From 2fd1800d6dcc1e8dd2184441315829b1cce1f56b Mon Sep 17 00:00:00 2001 From: ashishpatel26 Date: Thu, 28 May 2026 10:36:09 +0530 Subject: [PATCH] feat: add Ollama local LLM support alongside Gemini MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a pluggable LLM provider system so users can run the app fully offline using a local Ollama server instead of Google Gemini. - Add src/services/gemini.service.js (extracted Gemini implementation) - Add src/services/ollama.service.js (Ollama /api/chat client with vision support for screenshot analysis and conversation history) - Convert src/services/llm.service.js into a dynamic provider router that delegates to gemini or ollama based on LLM_PROVIDER env var / runtime config — no restart required when switching - Extend src/core/config.js with llm.provider and llm.ollama config block - Add LLM_PROVIDER, OLLAMA_BASE_URL, OLLAMA_MODEL, OLLAMA_VISION_MODEL to env.example - Add "AI Provider" selector and Ollama settings section to settings.html; Gemini/Ollama panels toggle based on selection - Wire up new IPC handlers in main.js: get/set-llm-provider, get-ollama-status, test-ollama-connection - Expose new IPC methods in preload.js Co-Authored-By: Claude Sonnet 4.6 --- env.example | 18 +- main.js | 47 +- preload.js | 8 + settings.html | 53 ++ src/core/config.js | 12 + src/services/gemini.service.js | 973 ++++++++++++++++++++++++++ src/services/llm.service.js | 1204 +------------------------------- src/services/ollama.service.js | 533 ++++++++++++++ src/ui/settings-window.js | 34 + 9 files changed, 1700 insertions(+), 1182 deletions(-) create mode 100644 src/services/gemini.service.js create mode 100644 src/services/ollama.service.js diff --git a/env.example b/env.example index 6edb941e..571187cd 100644 --- a/env.example +++ b/env.example @@ -1,7 +1,21 @@ -# Google Gemini API Configuration -# Get your API key from: https://makersuite.google.com/app/apikey +# ── AI Provider ────────────────────────────────────────────────────────────── +# Choose your LLM backend: 'gemini' (cloud, default) or 'ollama' (local) +LLM_PROVIDER=gemini + +# ── Google Gemini (cloud) ───────────────────────────────────────────────────── +# Required when LLM_PROVIDER=gemini +# Get your API key from: https://aistudio.google.com/ GEMINI_API_KEY=your_gemini_api_key_here +# ── Ollama (local) ──────────────────────────────────────────────────────────── +# Required when LLM_PROVIDER=ollama +# Install Ollama from https://ollama.ai, then: +# ollama pull llama3.2 # text model +# ollama pull llava # vision model (for screenshot analysis) +OLLAMA_BASE_URL=http://localhost:11434 +OLLAMA_MODEL=llama3.2 +OLLAMA_VISION_MODEL=llava + # Speech Recognition Configuration # Choose one provider: azure or whisper SPEECH_PROVIDER=whisper diff --git a/main.js b/main.js index 1984e166..5f71fdf8 100644 --- a/main.js +++ b/main.js @@ -446,6 +446,19 @@ class ApplicationController { } }); + ipcMain.handle("get-llm-provider", () => { + return { provider: llmService._getProviderName() }; + }); + + ipcMain.handle("set-llm-provider", (event, provider) => { + const p = String(provider).toLowerCase(); + process.env.LLM_PROVIDER = p; + const config = require('./src/core/config'); + config.set('llm.provider', p); + logger.info('LLM provider switched', { provider: p }); + return { provider: p }; + }); + ipcMain.handle("set-gemini-api-key", (event, apiKey) => { llmService.updateApiKey(apiKey); return llmService.getStats(); @@ -455,6 +468,16 @@ class ApplicationController { return llmService.getStats(); }); + ipcMain.handle("get-ollama-status", () => { + const ollamaService = require('./src/services/ollama.service'); + return ollamaService.getStats(); + }); + + ipcMain.handle("test-ollama-connection", async () => { + const ollamaService = require('./src/services/ollama.service'); + return await ollamaService.testConnection(); + }); + // Window binding IPC handlers ipcMain.handle("set-window-binding", (event, enabled) => { return windowManager.setWindowBinding(enabled); @@ -1073,13 +1096,19 @@ class ApplicationController { getSettings() { return { - codingLanguage: this.codingLanguage || "cpp", // Default to C++ + codingLanguage: this.codingLanguage || "cpp", activeSkill: this.activeSkill || "dsa", appIcon: this.appIcon || "terminal", selectedIcon: this.appIcon || "terminal", // pass through env-derived settings for UI convenience (masked) azureConfigured: !!process.env.AZURE_SPEECH_KEY && !!process.env.AZURE_SPEECH_REGION, - speechAvailable: this.speechAvailable + speechAvailable: this.speechAvailable, + // LLM provider + llmProvider: process.env.LLM_PROVIDER || "gemini", + // Ollama settings (safe to expose — no credentials) + ollamaBaseUrl: process.env.OLLAMA_BASE_URL || "http://localhost:11434", + ollamaModel: process.env.OLLAMA_MODEL || "llama3.2", + ollamaVisionModel: process.env.OLLAMA_VISION_MODEL || "llava" }; } @@ -1104,6 +1133,20 @@ class ApplicationController { this.appIcon = settings.appIcon; } + // LLM provider switch + if (settings.llmProvider) { + const p = String(settings.llmProvider).toLowerCase(); + process.env.LLM_PROVIDER = p; + const config = require('./src/core/config'); + config.set('llm.provider', p); + logger.info('LLM provider updated via settings', { provider: p }); + } + + // Ollama settings + if (settings.ollamaBaseUrl) process.env.OLLAMA_BASE_URL = settings.ollamaBaseUrl; + if (settings.ollamaModel) process.env.OLLAMA_MODEL = settings.ollamaModel; + if (settings.ollamaVisionModel) process.env.OLLAMA_VISION_MODEL = settings.ollamaVisionModel; + // Handle icon change specifically if (settings.selectedIcon) { this.appIcon = settings.selectedIcon; diff --git a/preload.js b/preload.js index 62f39852..45b256ca 100644 --- a/preload.js +++ b/preload.js @@ -30,10 +30,18 @@ contextBridge.exposeInMainWorld('electronAPI', { sendChatMessage: (text) => ipcRenderer.invoke('send-chat-message', text), getSkillPrompt: (skillName) => ipcRenderer.invoke('get-skill-prompt', skillName), + // LLM provider selection + getLlmProvider: () => ipcRenderer.invoke('get-llm-provider'), + setLlmProvider: (provider) => ipcRenderer.invoke('set-llm-provider', provider), + // Gemini LLM configuration setGeminiApiKey: (apiKey) => ipcRenderer.invoke('set-gemini-api-key', apiKey), getGeminiStatus: () => ipcRenderer.invoke('get-gemini-status'), testGeminiConnection: () => ipcRenderer.invoke('test-gemini-connection'), + + // Ollama configuration + getOllamaStatus: () => ipcRenderer.invoke('get-ollama-status'), + testOllamaConnection: () => ipcRenderer.invoke('test-ollama-connection'), // Settings showSettings: () => ipcRenderer.invoke('show-settings'), diff --git a/settings.html b/settings.html index e7402278..aa21598b 100644 --- a/settings.html +++ b/settings.html @@ -413,6 +413,25 @@
+
+ + AI Provider +
+
+
+
+
AI Backend
+
Gemini uses the cloud; Ollama runs models locally
+
+ +
+
+
+ +
Gemini Settings @@ -427,6 +446,40 @@
+ +
+
+ + Ollama Settings +
+
+
+
+
Ollama Base URL
+
URL where Ollama is running
+
+ +
+
+
+
Text Model
+
Model for text / chat (e.g. llama3.2, mistral, phi4)
+
+ +
+
+
+
Vision Model
+
Model for screenshot analysis (e.g. llava, llava-phi3)
+
+ +
+
+ Ollama must be running locally. Install from ollama.ai and pull models first:
+ ollama pull llama3.2 && ollama pull llava +
+
+
diff --git a/src/core/config.js b/src/core/config.js index 35f7ca54..5742efbf 100644 --- a/src/core/config.js +++ b/src/core/config.js @@ -39,6 +39,7 @@ class ConfigManager { }, llm: { + provider: process.env.LLM_PROVIDER || 'gemini', // 'gemini' | 'ollama' gemini: { model: 'gemini-2.5-flash', maxRetries: 3, @@ -51,6 +52,17 @@ class ConfigManager { topP: 0.9, maxOutputTokens: 4096 } + }, + ollama: { + baseUrl: process.env.OLLAMA_BASE_URL || 'http://localhost:11434', + model: process.env.OLLAMA_MODEL || 'llama3.2', + visionModel: process.env.OLLAMA_VISION_MODEL || 'llava', + timeout: 120000, + fallbackEnabled: true, + generation: { + temperature: 0.7, + maxOutputTokens: 4096 + } } }, diff --git a/src/services/gemini.service.js b/src/services/gemini.service.js new file mode 100644 index 00000000..7f674d29 --- /dev/null +++ b/src/services/gemini.service.js @@ -0,0 +1,973 @@ +const { GoogleGenerativeAI } = require('@google/generative-ai'); +const logger = require('../core/logger').createServiceLogger('LLM'); +const config = require('../core/config'); +const { promptLoader } = require('../../prompt-loader'); + +class GeminiService { + constructor() { + this.client = null; + this.model = null; + this.isInitialized = false; + this.requestCount = 0; + this.errorCount = 0; + + this.initializeClient(); + } + + initializeClient() { + const apiKey = config.getApiKey('GEMINI'); + + if (!apiKey || apiKey === 'your-api-key-here') { + logger.warn('Gemini API key not configured', { + keyExists: !!apiKey, + isPlaceholder: apiKey === 'your-api-key-here' + }); + return; + } + + try { + this.client = new GoogleGenerativeAI(apiKey); + + const modelName = config.get('llm.gemini.model'); + this.model = this.client.getGenerativeModel({ + model: modelName, + generationConfig: this.getGenerationConfig() + }); + this.isInitialized = true; + + logger.info('Gemini AI client initialized successfully', { + model: modelName + }); + } catch (error) { + logger.error('Failed to initialize Gemini client', { + error: error.message + }); + } + } + + getGenerationConfig(overrides = {}) { + const defaults = config.get('llm.gemini.generation') || {}; + const fallback = { + temperature: 0.7, + topK: 40, + topP: 0.95, + maxOutputTokens: 4096 + }; + + const merged = { ...fallback, ...defaults, ...overrides }; + return Object.fromEntries( + Object.entries(merged).filter(([, value]) => value !== undefined && value !== null) + ); + } + + applyGenerationDefaults(request, overrides = {}) { + request.generationConfig = this.getGenerationConfig({ ...(request.generationConfig || {}), ...overrides }); + return request; + } + + extractTextFromCandidates(response) { + const candidates = Array.isArray(response?.candidates) + ? response.candidates + : Array.isArray(response) + ? response + : []; + + if (!candidates.length) { + throw new Error('No candidates in Gemini response'); + } + + const candidateWithText = candidates.find(candidate => { + const parts = candidate?.content?.parts; + return Array.isArray(parts) && parts.some(part => typeof part.text === 'string' && part.text.trim().length > 0); + }); + + if (!candidateWithText) { + const finishReasons = candidates.map(c => c.finishReason || 'unknown').join(', '); + throw new Error(`No text parts in candidates. Finish reasons: ${finishReasons}`); + } + + const textParts = candidateWithText.content.parts + .filter(part => typeof part.text === 'string' && part.text.trim().length > 0) + .map(part => part.text.trim()); + + if (!textParts.length) { + throw new Error(`Candidate parts missing text after filtering: ${JSON.stringify(candidateWithText)}`); + } + + const text = textParts.join('\n'); + + return { + text, + candidate: candidateWithText, + finishReason: candidateWithText.finishReason || null + }; + } + + async processImageWithSkill(imageBuffer, mimeType, activeSkill, sessionMemory = [], programmingLanguage = null) { + if (!this.isInitialized) { + throw new Error('LLM service not initialized. Check Gemini API key configuration.'); + } + + if (!imageBuffer || !Buffer.isBuffer(imageBuffer)) { + throw new Error('Invalid image buffer provided to processImageWithSkill'); + } + + const startTime = Date.now(); + this.requestCount++; + + try { + const { promptLoader } = require('../../prompt-loader'); + const skillPrompt = promptLoader.getSkillPrompt(activeSkill, programmingLanguage) || ''; + + const base64 = imageBuffer.toString('base64'); + + const request = { + contents: [ + { + role: 'user', + parts: [ + { text: this.formatImageInstruction(activeSkill, programmingLanguage) }, + { inlineData: { data: base64, mimeType } } + ] + } + ] + }; + + this.applyGenerationDefaults(request); + + if (skillPrompt && skillPrompt.trim().length > 0) { + request.systemInstruction = { parts: [{ text: skillPrompt }] }; + } + + let responseText; + const preferAlternative = !!config.get('llm.gemini.enableFallbackMethod'); + try { + if (preferAlternative) { + logger.debug('Attempting alternative HTTPS method first for reliability'); + responseText = await this.executeAlternativeRequest(request); + } else { + responseText = await this.executeRequest(request); + } + } catch (error) { + const secondaryLabel = preferAlternative ? 'primary SDK method' : 'alternative HTTPS method'; + logger.warn(`${preferAlternative ? 'Alternative' : 'Primary'} method failed, trying ${secondaryLabel}`, { error: error.message }); + const secondaryFn = preferAlternative ? this.executeRequest.bind(this) : this.executeAlternativeRequest.bind(this); + + try { + responseText = await secondaryFn(request); + } catch (secondaryError) { + logger.error('Both Gemini request methods failed', { + firstError: error.message, + secondError: secondaryError.message + }); + throw secondaryError; + } + } + + const finalResponse = programmingLanguage + ? this.enforceProgrammingLanguage(responseText, programmingLanguage) + : responseText; + + logger.logPerformance('LLM image processing', startTime, { + activeSkill, + imageSize: imageBuffer.length, + responseLength: finalResponse.length, + programmingLanguage: programmingLanguage || 'not specified', + requestId: this.requestCount + }); + + return { + response: finalResponse, + metadata: { + skill: activeSkill, + programmingLanguage, + processingTime: Date.now() - startTime, + requestId: this.requestCount, + usedFallback: false, + isImageAnalysis: true, + mimeType, + provider: 'gemini' + } + }; + } catch (error) { + this.errorCount++; + logger.error('LLM image processing failed', { + error: error.message, + activeSkill, + requestId: this.requestCount + }); + + if (config.get('llm.gemini.fallbackEnabled')) { + return this.generateFallbackResponse('[image]', activeSkill); + } + throw error; + } + } + + formatImageInstruction(activeSkill, programmingLanguage) { + const langNote = programmingLanguage ? ` Use only ${programmingLanguage.toUpperCase()} for any code.` : ''; + return `Analyze this image for a ${activeSkill.toUpperCase()} question. Extract the problem concisely and provide the best possible solution with explanation and final code.${langNote}`; + } + + async processTextWithSkill(text, activeSkill, sessionMemory = [], programmingLanguage = null) { + if (!this.isInitialized) { + throw new Error('LLM service not initialized. Check Gemini API key configuration.'); + } + + const startTime = Date.now(); + this.requestCount++; + + try { + logger.info('Processing text with LLM', { + activeSkill, + textLength: text.length, + hasSessionMemory: sessionMemory.length > 0, + programmingLanguage: programmingLanguage || 'not specified', + requestId: this.requestCount + }); + + const geminiRequest = this.buildGeminiRequest(text, activeSkill, sessionMemory, programmingLanguage); + + const preferAlternative = !!config.get('llm.gemini.enableFallbackMethod'); + let response; + try { + if (preferAlternative) { + logger.debug('Attempting alternative HTTPS method first for text processing'); + response = await this.executeAlternativeRequest(geminiRequest); + } else { + response = await this.executeRequest(geminiRequest); + } + } catch (error) { + const secondaryLabel = preferAlternative ? 'primary SDK method' : 'alternative HTTPS method'; + logger.warn(`${preferAlternative ? 'Alternative' : 'Primary'} method failed, trying ${secondaryLabel}`, { + error: error.message, + requestId: this.requestCount + }); + const secondaryFn = preferAlternative ? this.executeRequest.bind(this) : this.executeAlternativeRequest.bind(this); + try { + response = await secondaryFn(geminiRequest); + } catch (secondaryError) { + logger.error('Both Gemini request methods failed for text processing', { + firstError: error.message, + secondError: secondaryError.message, + requestId: this.requestCount + }); + throw secondaryError; + } + } + + const finalResponse = programmingLanguage + ? this.enforceProgrammingLanguage(response, programmingLanguage) + : response; + + logger.logPerformance('LLM text processing', startTime, { + activeSkill, + textLength: text.length, + responseLength: finalResponse.length, + programmingLanguage: programmingLanguage || 'not specified', + requestId: this.requestCount + }); + + return { + response: finalResponse, + metadata: { + skill: activeSkill, + programmingLanguage, + processingTime: Date.now() - startTime, + requestId: this.requestCount, + usedFallback: false, + provider: 'gemini' + } + }; + } catch (error) { + this.errorCount++; + logger.error('LLM processing failed', { + error: error.message, + activeSkill, + programmingLanguage: programmingLanguage || 'not specified', + requestId: this.requestCount + }); + + if (config.get('llm.gemini.fallbackEnabled')) { + return this.generateFallbackResponse(text, activeSkill); + } + + throw error; + } + } + + async processTranscriptionWithIntelligentResponse(text, activeSkill, sessionMemory = [], programmingLanguage = null) { + if (!this.isInitialized) { + throw new Error('LLM service not initialized. Check Gemini API key configuration.'); + } + + const startTime = Date.now(); + this.requestCount++; + + try { + logger.info('Processing transcription with intelligent response', { + activeSkill, + textLength: text.length, + hasSessionMemory: sessionMemory.length > 0, + programmingLanguage: programmingLanguage || 'not specified', + requestId: this.requestCount + }); + + const geminiRequest = this.buildIntelligentTranscriptionRequest(text, activeSkill, sessionMemory, programmingLanguage); + + const preferAlternative = !!config.get('llm.gemini.enableFallbackMethod'); + let response; + try { + if (preferAlternative) { + logger.debug('Attempting alternative HTTPS method first for transcription processing'); + response = await this.executeAlternativeRequest(geminiRequest); + } else { + response = await this.executeRequest(geminiRequest); + } + } catch (error) { + const secondaryLabel = preferAlternative ? 'primary SDK method' : 'alternative HTTPS method'; + logger.warn(`${preferAlternative ? 'Alternative' : 'Primary'} method failed, trying ${secondaryLabel}`, { + error: error.message, + requestId: this.requestCount + }); + const secondaryFn = preferAlternative ? this.executeRequest.bind(this) : this.executeAlternativeRequest.bind(this); + try { + response = await secondaryFn(geminiRequest); + } catch (secondaryError) { + logger.error('Both Gemini request methods failed for transcription processing', { + firstError: error.message, + secondError: secondaryError.message, + requestId: this.requestCount + }); + throw secondaryError; + } + } + + const finalResponse = programmingLanguage + ? this.enforceProgrammingLanguage(response, programmingLanguage) + : response; + + logger.logPerformance('LLM transcription processing', startTime, { + activeSkill, + textLength: text.length, + responseLength: finalResponse.length, + programmingLanguage: programmingLanguage || 'not specified', + requestId: this.requestCount + }); + + return { + response: finalResponse, + metadata: { + skill: activeSkill, + programmingLanguage, + processingTime: Date.now() - startTime, + requestId: this.requestCount, + usedFallback: false, + isTranscriptionResponse: true, + provider: 'gemini' + } + }; + } catch (error) { + this.errorCount++; + logger.error('LLM transcription processing failed', { + error: error.message, + activeSkill, + programmingLanguage: programmingLanguage || 'not specified', + requestId: this.requestCount + }); + + if (config.get('llm.gemini.fallbackEnabled')) { + return this.generateIntelligentFallbackResponse(text, activeSkill); + } + + throw error; + } + } + + enforceProgrammingLanguage(text, programmingLanguage) { + try { + if (!text || !programmingLanguage) return text; + const norm = String(programmingLanguage).toLowerCase(); + const fenceTagMap = { cpp: 'cpp', c: 'c', python: 'python', java: 'java', javascript: 'javascript', js: 'javascript' }; + const fenceTag = fenceTagMap[norm] || norm || 'text'; + + const replacedBackticks = text.replace(/```([^\n]*)\n/g, (match, info) => { + const current = (info || '').trim(); + if (current.split(/\s+/)[0].toLowerCase() === fenceTag) return match; + return '```' + fenceTag + '\n'; + }); + + const normalizedTildes = replacedBackticks.replace(/~~~([^\n]*)\n/g, () => '```' + fenceTag + '\n'); + + return normalizedTildes; + } catch (_) { + return text; + } + } + + buildGeminiRequest(text, activeSkill, sessionMemory, programmingLanguage) { + const sessionManager = require('../managers/session.manager'); + + if (sessionManager && typeof sessionManager.getConversationHistory === 'function') { + const conversationHistory = sessionManager.getConversationHistory(15); + const skillContext = sessionManager.getSkillContext(activeSkill, programmingLanguage); + return this.buildGeminiRequestWithHistory(text, activeSkill, conversationHistory, skillContext, programmingLanguage); + } + + const requestComponents = promptLoader.getRequestComponents( + activeSkill, + text, + sessionMemory, + programmingLanguage + ); + + const request = { + contents: [] + }; + + this.applyGenerationDefaults(request); + + if (requestComponents.shouldUseModelMemory && requestComponents.skillPrompt) { + request.systemInstruction = { + parts: [{ text: requestComponents.skillPrompt }] + }; + } + + request.contents.push({ + role: 'user', + parts: [{ text: this.formatUserMessage(text, activeSkill) }] + }); + + return request; + } + + buildGeminiRequestWithHistory(text, activeSkill, conversationHistory, skillContext, programmingLanguage) { + const request = { + contents: [] + }; + + this.applyGenerationDefaults(request); + + if (skillContext.skillPrompt) { + request.systemInstruction = { + parts: [{ text: skillContext.skillPrompt }] + }; + } + + const conversationContents = conversationHistory + .filter(event => { + return event.role !== 'system' && + event.content && + typeof event.content === 'string' && + event.content.trim().length > 0; + }) + .map(event => { + const content = event.content.trim(); + return { + role: event.role === 'model' ? 'model' : 'user', + parts: [{ text: content }] + }; + }); + + request.contents.push(...conversationContents); + + const formattedMessage = this.formatUserMessage(text, activeSkill); + if (!formattedMessage || formattedMessage.trim().length === 0) { + throw new Error('Failed to format user message or message is empty'); + } + + request.contents.push({ + role: 'user', + parts: [{ text: formattedMessage }] + }); + + return request; + } + + buildIntelligentTranscriptionRequest(text, activeSkill, sessionMemory, programmingLanguage) { + const cleanText = text && typeof text === 'string' ? text.trim() : ''; + if (!cleanText) { + throw new Error('Empty or invalid transcription text provided to buildIntelligentTranscriptionRequest'); + } + + const sessionManager = require('../managers/session.manager'); + + if (sessionManager && typeof sessionManager.getConversationHistory === 'function') { + const conversationHistory = sessionManager.getConversationHistory(10); + const skillContext = sessionManager.getSkillContext(activeSkill, programmingLanguage); + return this.buildIntelligentTranscriptionRequestWithHistory(cleanText, activeSkill, conversationHistory, skillContext, programmingLanguage); + } + + const request = { + contents: [] + }; + + this.applyGenerationDefaults(request); + + const intelligentPrompt = this.getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage); + if (!intelligentPrompt) { + throw new Error('Failed to generate intelligent transcription prompt'); + } + + request.systemInstruction = { + parts: [{ text: intelligentPrompt }] + }; + + request.contents.push({ + role: 'user', + parts: [{ text: cleanText }] + }); + + return request; + } + + buildIntelligentTranscriptionRequestWithHistory(text, activeSkill, conversationHistory, skillContext, programmingLanguage) { + const request = { + contents: [] + }; + + this.applyGenerationDefaults(request); + + const intelligentPrompt = this.getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage); + request.systemInstruction = { parts: [{ text: intelligentPrompt }] }; + + const conversationContents = conversationHistory + .filter(event => { + return event.role !== 'system' && + event.content && + typeof event.content === 'string' && + event.content.trim().length > 0; + }) + .slice(-8) + .map(event => { + const content = event.content.trim(); + if (!content) return null; + return { + role: event.role === 'model' ? 'model' : 'user', + parts: [{ text: content }] + }; + }) + .filter(content => content !== null); + + request.contents.push(...conversationContents); + + const cleanText = text && typeof text === 'string' ? text.trim() : ''; + if (!cleanText) { + throw new Error('Empty or invalid transcription text provided'); + } + + request.contents.push({ + role: 'user', + parts: [{ text: cleanText }] + }); + + if (request.contents.length === 0) { + throw new Error('No valid content to send to Gemini API'); + } + + return request; + } + + getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage) { + let prompt = `# Intelligent Transcription Response System + +Assume you are asked a question in ${activeSkill.toUpperCase()} mode. Your job is to intelligently respond to question/message with appropriate brevity. +Assume you are in an interview and you need to perform best in ${activeSkill.toUpperCase()} mode. +Always respond to the point, do not repeat the question or unnecessary information which is not related to ${activeSkill}.`; + + if (programmingLanguage) { + const lang = String(programmingLanguage).toLowerCase(); + const languageMap = { cpp: 'C++', c: 'C', python: 'Python', java: 'Java', javascript: 'JavaScript', js: 'JavaScript' }; + const fenceTagMap = { cpp: 'cpp', c: 'c', python: 'python', java: 'java', javascript: 'javascript', js: 'javascript' }; + const languageTitle = languageMap[lang] || (lang.charAt(0).toUpperCase() + lang.slice(1)); + const fenceTag = fenceTagMap[lang] || lang || 'text'; + prompt += `\n\nCODING CONTEXT: Respond ONLY in ${languageTitle}. All code blocks must use triple backticks with language tag \`\`\`${fenceTag}\`\`\`. Do not include other languages unless explicitly asked.`; + } + + prompt += ` + +## Response Rules: + +### If the transcription is casual conversation, greetings, or NOT related to ${activeSkill}: +- Respond with: "Yeah, I'm listening. Ask your question relevant to ${activeSkill}." +- Or similar brief acknowledgments like: "I'm here, what's your ${activeSkill} question?" + +### If the transcription IS relevant to ${activeSkill} or is a follow-up question: +- Provide a comprehensive, detailed response +- Use bullet points, examples, and explanations +- Focus on actionable insights and complete answers +- Do not truncate or shorten your response + +### Examples of casual/irrelevant messages: +- "Hello", "Hi there", "How are you?" +- "What's the weather like?" +- "I'm just testing this" +- Random conversations not related to ${activeSkill} + +### Examples of relevant messages: +- Actual questions about ${activeSkill} concepts +- Follow-up questions to previous responses +- Requests for clarification on ${activeSkill} topics +- Problem-solving requests related to ${activeSkill} + +## Response Format: +- Keep responses detailed +- Use bullet points for structured answers +- Be encouraging and helpful +- Stay focused on ${activeSkill} + +If the user's input is a coding or DSA problem statement and contains no code, produce a complete, runnable solution in the selected programming language without asking for more details. Always include the final implementation in a properly tagged code block. + +Remember: Be intelligent about filtering - only provide detailed responses when the user actually needs help with ${activeSkill}.`; + + return prompt; + } + + formatUserMessage(text, activeSkill) { + return `Context: ${activeSkill.toUpperCase()} analysis request\n\nText to analyze:\n${text}`; + } + + async executeRequest(geminiRequest) { + const maxRetries = config.get('llm.gemini.maxRetries'); + const timeout = config.get('llm.gemini.timeout'); + + for (let attempt = 1; attempt <= maxRetries; attempt++) { + try { + await this.performPreflightCheck(); + + const timeoutPromise = new Promise((_, reject) => + setTimeout(() => reject(new Error('Request timeout')), timeout) + ); + + const requestPromise = this.model.generateContent(geminiRequest); + const result = await Promise.race([requestPromise, timeoutPromise]); + + if (!result.response) { + throw new Error('Empty response from Gemini API'); + } + + const { text, finishReason } = this.extractTextFromCandidates(result.response); + + if (finishReason === 'MAX_TOKENS') { + logger.warn('Gemini primary response reached max tokens limit', { attempt, finishReason }); + } + + return text; + } catch (error) { + const errorInfo = this.analyzeError(error); + + logger.warn(`Gemini API attempt ${attempt} failed`, { + error: error.message, + errorType: errorInfo.type, + remainingAttempts: maxRetries - attempt + }); + + if (attempt === maxRetries) { + const finalError = new Error(`Gemini API failed after ${maxRetries} attempts: ${error.message}`); + finalError.errorAnalysis = errorInfo; + finalError.originalError = error; + throw finalError; + } + + const baseDelay = errorInfo.isNetworkError ? 2500 : 1500; + const delay = baseDelay * attempt + Math.random() * 1000; + await this.delay(delay); + } + } + } + + async performPreflightCheck() { + try { + await this.testNetworkConnection({ + host: 'generativelanguage.googleapis.com', + port: 443, + name: 'Gemini API Endpoint' + }); + } catch (error) { + logger.warn('Preflight check failed', { error: error.message }); + } + } + + getUserAgent() { + try { + if (typeof navigator !== 'undefined' && navigator.userAgent) { + return navigator.userAgent; + } + return `Node.js/${process.version} (${process.platform}; ${process.arch})`; + } catch { + return 'Unknown'; + } + } + + analyzeError(error) { + const errorMessage = error.message.toLowerCase(); + + if (errorMessage.includes('fetch failed') || + errorMessage.includes('network error') || + errorMessage.includes('enotfound') || + errorMessage.includes('econnrefused') || + errorMessage.includes('timeout')) { + return { type: 'NETWORK_ERROR', isNetworkError: true, suggestedAction: 'Check internet connection and firewall settings' }; + } + + if (errorMessage.includes('unauthorized') || + errorMessage.includes('invalid api key') || + errorMessage.includes('forbidden')) { + return { type: 'AUTH_ERROR', isNetworkError: false, suggestedAction: 'Verify Gemini API key configuration' }; + } + + if (errorMessage.includes('quota') || + errorMessage.includes('rate limit') || + errorMessage.includes('too many requests')) { + return { type: 'RATE_LIMIT_ERROR', isNetworkError: false, suggestedAction: 'Wait before retrying or check API quota' }; + } + + if (errorMessage.includes('request timeout') || errorMessage.includes('etimedout')) { + return { type: 'TIMEOUT_ERROR', isNetworkError: true, suggestedAction: 'Check network latency or increase timeout' }; + } + + return { type: 'UNKNOWN_ERROR', isNetworkError: false, suggestedAction: 'Check logs for more details' }; + } + + async checkNetworkConnectivity() { + const connectivityTests = [ + { host: 'google.com', port: 443, name: 'Google (HTTPS)' }, + { host: 'generativelanguage.googleapis.com', port: 443, name: 'Gemini API Endpoint' } + ]; + + const results = await Promise.allSettled( + connectivityTests.map(test => this.testNetworkConnection(test)) + ); + + const connectivity = { + timestamp: new Date().toISOString(), + tests: results.map((result, index) => ({ + ...connectivityTests[index], + success: result.status === 'fulfilled' && result.value, + error: result.status === 'rejected' ? result.reason.message : null + })) + }; + + logger.info('Network connectivity check completed', connectivity); + return connectivity; + } + + async testNetworkConnection({ host, port, name }) { + return new Promise((resolve, reject) => { + const net = require('net'); + const socket = new net.Socket(); + + const timeout = setTimeout(() => { + socket.destroy(); + reject(new Error(`Connection timeout to ${host}:${port}`)); + }, 5000); + + socket.on('connect', () => { + clearTimeout(timeout); + socket.destroy(); + resolve(true); + }); + + socket.on('error', (error) => { + clearTimeout(timeout); + reject(new Error(`Connection failed to ${host}:${port}: ${error.message}`)); + }); + + socket.connect(port, host); + }); + } + + generateFallbackResponse(text, activeSkill) { + logger.info('Generating fallback response', { activeSkill }); + + const fallbackResponses = { + 'dsa': 'This appears to be a data structures and algorithms problem. Consider breaking it down into smaller components and identifying the appropriate algorithm or data structure to use.', + 'system-design': 'For this system design question, consider scalability, reliability, and the trade-offs between different architectural approaches.', + 'programming': 'This looks like a programming challenge. Focus on understanding the requirements, edge cases, and optimal time/space complexity.', + 'default': 'I can help analyze this content. Please ensure your Gemini API key is properly configured for detailed analysis.' + }; + + const response = fallbackResponses[activeSkill] || fallbackResponses.default; + + return { + response, + metadata: { + skill: activeSkill, + processingTime: 0, + requestId: this.requestCount, + usedFallback: true, + provider: 'gemini' + } + }; + } + + generateIntelligentFallbackResponse(text, activeSkill) { + logger.info('Generating intelligent fallback response for transcription', { activeSkill }); + + const skillKeywords = { + 'dsa': ['algorithm', 'data structure', 'array', 'tree', 'graph', 'sort', 'search', 'complexity', 'big o'], + 'programming': ['code', 'function', 'variable', 'class', 'method', 'bug', 'debug', 'syntax'], + 'system-design': ['scalability', 'database', 'architecture', 'microservice', 'load balancer', 'cache'], + 'behavioral': ['interview', 'experience', 'situation', 'leadership', 'conflict', 'team'], + 'sales': ['customer', 'deal', 'negotiation', 'price', 'revenue', 'prospect'], + 'presentation': ['slide', 'audience', 'public speaking', 'presentation', 'nervous'], + 'data-science': ['data', 'model', 'machine learning', 'statistics', 'analytics', 'python', 'pandas'], + 'devops': ['deployment', 'ci/cd', 'docker', 'kubernetes', 'infrastructure', 'monitoring'], + 'negotiation': ['negotiate', 'compromise', 'agreement', 'terms', 'conflict resolution'] + }; + + const textLower = text.toLowerCase(); + const relevantKeywords = skillKeywords[activeSkill] || []; + const hasRelevantKeywords = relevantKeywords.some(keyword => textLower.includes(keyword)); + + const questionIndicators = ['how', 'what', 'why', 'when', 'where', 'can you', 'could you', 'should i', '?']; + const seemsLikeQuestion = questionIndicators.some(indicator => textLower.includes(indicator)); + + let response; + if (hasRelevantKeywords || seemsLikeQuestion) { + response = `I'm having trouble processing that right now, but it sounds like a ${activeSkill} question. Could you rephrase or ask more specifically about what you need help with?`; + } else { + response = `Yeah, I'm listening. Ask your question relevant to ${activeSkill}.`; + } + + return { + response, + metadata: { + skill: activeSkill, + processingTime: 0, + requestId: this.requestCount, + usedFallback: true, + isTranscriptionResponse: true, + provider: 'gemini' + } + }; + } + + async testConnection() { + if (!this.isInitialized) { + return { success: false, error: 'Service not initialized', provider: 'gemini' }; + } + + try { + const networkCheck = await this.checkNetworkConnectivity(); + const hasNetworkIssues = networkCheck.tests.some(test => !test.success); + + if (hasNetworkIssues) { + logger.warn('Network connectivity issues detected', networkCheck); + } + + const testRequest = { + contents: [{ + role: 'user', + parts: [{ text: 'Test connection. Please respond with "OK".' }] + }] + }; + + this.applyGenerationDefaults(testRequest, { temperature: 0, maxOutputTokens: 10 }); + + const startTime = Date.now(); + const result = await this.model.generateContent(testRequest); + const latency = Date.now() - startTime; + const { text } = this.extractTextFromCandidates(result.response); + + return { + success: true, + response: text, + latency, + networkConnectivity: networkCheck, + provider: 'gemini' + }; + } catch (error) { + const errorAnalysis = this.analyzeError(error); + return { + success: false, + error: error.message, + errorAnalysis, + networkConnectivity: await this.checkNetworkConnectivity().catch(() => null), + provider: 'gemini' + }; + } + } + + updateApiKey(newApiKey) { + process.env.GEMINI_API_KEY = newApiKey; + this.isInitialized = false; + this.initializeClient(); + logger.info('API key updated and client reinitialized'); + } + + getStats() { + return { + isInitialized: this.isInitialized, + requestCount: this.requestCount, + errorCount: this.errorCount, + successRate: this.requestCount > 0 ? ((this.requestCount - this.errorCount) / this.requestCount) * 100 : 0, + config: config.get('llm.gemini'), + provider: 'gemini' + }; + } + + delay(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); + } + + async executeAlternativeRequest(geminiRequest) { + const https = require('https'); + const apiKey = config.getApiKey('GEMINI'); + const model = config.get('llm.gemini.model'); + + logger.info('Using alternative HTTPS request method'); + + const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`; + const postData = JSON.stringify(geminiRequest); + const agent = new https.Agent({ keepAlive: true, maxSockets: 1 }); + + const options = { + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'x-goog-api-key': apiKey, + 'Content-Length': Buffer.byteLength(postData), + 'User-Agent': this.getUserAgent() + }, + timeout: config.get('llm.gemini.timeout'), + agent + }; + + return new Promise((resolve, reject) => { + const req = https.request(url, options, (res) => { + let data = ''; + + res.on('data', (chunk) => { data += chunk; }); + + res.on('end', () => { + try { + if (res.statusCode !== 200) { + reject(new Error(`HTTP ${res.statusCode}: ${data}`)); + return; + } + + const response = JSON.parse(data); + const { text, finishReason } = this.extractTextFromCandidates(response); + + if (finishReason === 'MAX_TOKENS') { + logger.warn('Gemini alternative response reached max tokens limit', { finishReason }); + } + + resolve(text.trim()); + } catch (parseError) { + reject(new Error(`Failed to parse response: ${parseError.message}`)); + } + }); + }); + + req.on('error', (error) => { reject(new Error(`Alternative request failed: ${error.message}`)); }); + req.on('timeout', () => { req.destroy(); reject(new Error('Alternative request timeout')); }); + + req.write(postData); + req.end(); + }); + } +} + +module.exports = new GeminiService(); diff --git a/src/services/llm.service.js b/src/services/llm.service.js index 0384691d..ebcf6e51 100644 --- a/src/services/llm.service.js +++ b/src/services/llm.service.js @@ -1,1184 +1,32 @@ -const { GoogleGenerativeAI } = require('@google/generative-ai'); -const logger = require('../core/logger').createServiceLogger('LLM'); const config = require('../core/config'); -const { promptLoader } = require('../../prompt-loader'); +const logger = require('../core/logger').createServiceLogger('LLMProvider'); -class LLMService { - constructor() { - this.client = null; - this.model = null; - this.isInitialized = false; - this.requestCount = 0; - this.errorCount = 0; - - this.initializeClient(); - } - - initializeClient() { - const apiKey = config.getApiKey('GEMINI'); - - if (!apiKey || apiKey === 'your-api-key-here') { - logger.warn('Gemini API key not configured', { - keyExists: !!apiKey, - isPlaceholder: apiKey === 'your-api-key-here' - }); - return; - } - - try { - this.client = new GoogleGenerativeAI(apiKey); - - // Use the correct model name for v1 API - const modelName = config.get('llm.gemini.model'); - this.model = this.client.getGenerativeModel({ - model: modelName, - generationConfig: this.getGenerationConfig() - }); - this.isInitialized = true; - - logger.info('Gemini AI client initialized successfully', { - model: modelName - }); - } catch (error) { - logger.error('Failed to initialize Gemini client', { - error: error.message - }); - } - } - - getGenerationConfig(overrides = {}) { - const defaults = config.get('llm.gemini.generation') || {}; - const fallback = { - temperature: 0.7, - topK: 40, - topP: 0.95, - maxOutputTokens: 4096 - }; - - const merged = { ...fallback, ...defaults, ...overrides }; - return Object.fromEntries( - Object.entries(merged).filter(([, value]) => value !== undefined && value !== null) - ); - } - - applyGenerationDefaults(request, overrides = {}) { - request.generationConfig = this.getGenerationConfig({ ...(request.generationConfig || {}), ...overrides }); - return request; - } - - extractTextFromCandidates(response) { - const candidates = Array.isArray(response?.candidates) - ? response.candidates - : Array.isArray(response) - ? response - : []; - - if (!candidates.length) { - throw new Error('No candidates in Gemini response'); - } - - const candidateWithText = candidates.find(candidate => { - const parts = candidate?.content?.parts; - return Array.isArray(parts) && parts.some(part => typeof part.text === 'string' && part.text.trim().length > 0); - }); - - if (!candidateWithText) { - const finishReasons = candidates.map(c => c.finishReason || 'unknown').join(', '); - throw new Error(`No text parts in candidates. Finish reasons: ${finishReasons}`); - } - - const textParts = candidateWithText.content.parts - .filter(part => typeof part.text === 'string' && part.text.trim().length > 0) - .map(part => part.text.trim()); - - if (!textParts.length) { - throw new Error(`Candidate parts missing text after filtering: ${JSON.stringify(candidateWithText)}`); - } - - const text = textParts.join('\n'); - - return { - text, - candidate: candidateWithText, - finishReason: candidateWithText.finishReason || null - }; - } - - /** - * Process an image directly with Gemini using the active skill prompt. - * The image buffer is sent as inlineData alongside a concise instruction. - * For image-based queries, we include the skill prompt (e.g., DSA) as systemInstruction. - * @param {Buffer} imageBuffer - PNG/JPEG image bytes - * @param {string} mimeType - e.g., 'image/png' or 'image/jpeg' - * @param {string} activeSkill - current skill (e.g. 'dsa') - * @param {Array} sessionMemory - optional (not required for image) - * @param {string|null} programmingLanguage - optional language context for skills that need it - * @returns {Promise<{response: string, metadata: object}>} - */ - async processImageWithSkill(imageBuffer, mimeType, activeSkill, sessionMemory = [], programmingLanguage = null) { - if (!this.isInitialized) { - throw new Error('LLM service not initialized. Check Gemini API key configuration.'); - } - - if (!imageBuffer || !Buffer.isBuffer(imageBuffer)) { - throw new Error('Invalid image buffer provided to processImageWithSkill'); - } - - const startTime = Date.now(); - this.requestCount++; - - try { - // Build system instruction using the skill prompt (with optional language injection) - const { promptLoader } = require('../../prompt-loader'); - const skillPrompt = promptLoader.getSkillPrompt(activeSkill, programmingLanguage) || ''; - - // Build request with text + image parts - const base64 = imageBuffer.toString('base64'); - - const request = { - contents: [ - { - role: 'user', - parts: [ - { text: this.formatImageInstruction(activeSkill, programmingLanguage) }, - { inlineData: { data: base64, mimeType } } - ] - } - ] - }; - - this.applyGenerationDefaults(request); - - if (skillPrompt && skillPrompt.trim().length > 0) { - request.systemInstruction = { parts: [{ text: skillPrompt }] }; - } - - // Execute with retries/timeout - try alternative method first for network reliability - let responseText; - const preferAlternative = !!config.get('llm.gemini.enableFallbackMethod'); - try { - if (preferAlternative) { - logger.debug('Attempting alternative HTTPS method first for reliability'); - responseText = await this.executeAlternativeRequest(request); - } else { - responseText = await this.executeRequest(request); - } - } catch (error) { - const secondaryLabel = preferAlternative ? 'primary SDK method' : 'alternative HTTPS method'; - logger.warn(`${preferAlternative ? 'Alternative' : 'Primary'} method failed, trying ${secondaryLabel}`, { error: error.message }); - const secondaryFn = preferAlternative ? this.executeRequest.bind(this) : this.executeAlternativeRequest.bind(this); - - try { - responseText = await secondaryFn(request); - } catch (secondaryError) { - logger.error('Both Gemini request methods failed', { - firstError: error.message, - secondError: secondaryError.message - }); - throw secondaryError; - } - } - - // Enforce language in code fences if provided - const finalResponse = programmingLanguage - ? this.enforceProgrammingLanguage(responseText, programmingLanguage) - : responseText; - - logger.logPerformance('LLM image processing', startTime, { - activeSkill, - imageSize: imageBuffer.length, - responseLength: finalResponse.length, - programmingLanguage: programmingLanguage || 'not specified', - requestId: this.requestCount - }); - - return { - response: finalResponse, - metadata: { - skill: activeSkill, - programmingLanguage, - processingTime: Date.now() - startTime, - requestId: this.requestCount, - usedFallback: false, - isImageAnalysis: true, - mimeType - } - }; - } catch (error) { - this.errorCount++; - logger.error('LLM image processing failed', { - error: error.message, - activeSkill, - requestId: this.requestCount - }); - - if (config.get('llm.gemini.fallbackEnabled')) { - return this.generateFallbackResponse('[image]', activeSkill); - } - throw error; - } - } - - formatImageInstruction(activeSkill, programmingLanguage) { - const langNote = programmingLanguage ? ` Use only ${programmingLanguage.toUpperCase()} for any code.` : ''; - return `Analyze this image for a ${activeSkill.toUpperCase()} question. Extract the problem concisely and provide the best possible solution with explanation and final code.${langNote}`; - } - - async processTextWithSkill(text, activeSkill, sessionMemory = [], programmingLanguage = null) { - if (!this.isInitialized) { - throw new Error('LLM service not initialized. Check Gemini API key configuration.'); - } - - const startTime = Date.now(); - this.requestCount++; - - try { - logger.info('Processing text with LLM', { - activeSkill, - textLength: text.length, - hasSessionMemory: sessionMemory.length > 0, - programmingLanguage: programmingLanguage || 'not specified', - requestId: this.requestCount - }); - - const geminiRequest = this.buildGeminiRequest(text, activeSkill, sessionMemory, programmingLanguage); - - const preferAlternative = !!config.get('llm.gemini.enableFallbackMethod'); - let response; - try { - if (preferAlternative) { - logger.debug('Attempting alternative HTTPS method first for text processing'); - response = await this.executeAlternativeRequest(geminiRequest); - } else { - response = await this.executeRequest(geminiRequest); - } - } catch (error) { - const secondaryLabel = preferAlternative ? 'primary SDK method' : 'alternative HTTPS method'; - logger.warn(`${preferAlternative ? 'Alternative' : 'Primary'} method failed, trying ${secondaryLabel}`, { - error: error.message, - requestId: this.requestCount - }); - const secondaryFn = preferAlternative ? this.executeRequest.bind(this) : this.executeAlternativeRequest.bind(this); - try { - response = await secondaryFn(geminiRequest); - } catch (secondaryError) { - logger.error('Both Gemini request methods failed for text processing', { - firstError: error.message, - secondError: secondaryError.message, - requestId: this.requestCount - }); - throw secondaryError; - } - } - - // Enforce language in code fences if programmingLanguage specified - const finalResponse = programmingLanguage - ? this.enforceProgrammingLanguage(response, programmingLanguage) - : response; - - logger.logPerformance('LLM text processing', startTime, { - activeSkill, - textLength: text.length, - responseLength: finalResponse.length, - programmingLanguage: programmingLanguage || 'not specified', - requestId: this.requestCount - }); - - return { - response: finalResponse, - metadata: { - skill: activeSkill, - programmingLanguage, - processingTime: Date.now() - startTime, - requestId: this.requestCount, - usedFallback: false - } - }; - } catch (error) { - this.errorCount++; - logger.error('LLM processing failed', { - error: error.message, - activeSkill, - programmingLanguage: programmingLanguage || 'not specified', - requestId: this.requestCount - }); - - if (config.get('llm.gemini.fallbackEnabled')) { - return this.generateFallbackResponse(text, activeSkill); - } - - throw error; - } - } - - async processTranscriptionWithIntelligentResponse(text, activeSkill, sessionMemory = [], programmingLanguage = null) { - if (!this.isInitialized) { - throw new Error('LLM service not initialized. Check Gemini API key configuration.'); - } - - const startTime = Date.now(); - this.requestCount++; - - try { - logger.info('Processing transcription with intelligent response', { - activeSkill, - textLength: text.length, - hasSessionMemory: sessionMemory.length > 0, - programmingLanguage: programmingLanguage || 'not specified', - requestId: this.requestCount - }); - - const geminiRequest = this.buildIntelligentTranscriptionRequest(text, activeSkill, sessionMemory, programmingLanguage); - - const preferAlternative = !!config.get('llm.gemini.enableFallbackMethod'); - let response; - try { - if (preferAlternative) { - logger.debug('Attempting alternative HTTPS method first for transcription processing'); - response = await this.executeAlternativeRequest(geminiRequest); - } else { - response = await this.executeRequest(geminiRequest); - } - } catch (error) { - const secondaryLabel = preferAlternative ? 'primary SDK method' : 'alternative HTTPS method'; - logger.warn(`${preferAlternative ? 'Alternative' : 'Primary'} method failed, trying ${secondaryLabel}`, { - error: error.message, - requestId: this.requestCount - }); - const secondaryFn = preferAlternative ? this.executeRequest.bind(this) : this.executeAlternativeRequest.bind(this); - try { - response = await secondaryFn(geminiRequest); - } catch (secondaryError) { - logger.error('Both Gemini request methods failed for transcription processing', { - firstError: error.message, - secondError: secondaryError.message, - requestId: this.requestCount - }); - throw secondaryError; - } - } - - // Enforce language in code fences if programmingLanguage specified - const finalResponse = programmingLanguage - ? this.enforceProgrammingLanguage(response, programmingLanguage) - : response; - - logger.logPerformance('LLM transcription processing', startTime, { - activeSkill, - textLength: text.length, - responseLength: finalResponse.length, - programmingLanguage: programmingLanguage || 'not specified', - requestId: this.requestCount - }); - - return { - response: finalResponse, - metadata: { - skill: activeSkill, - programmingLanguage, - processingTime: Date.now() - startTime, - requestId: this.requestCount, - usedFallback: false, - isTranscriptionResponse: true - } - }; - } catch (error) { - this.errorCount++; - logger.error('LLM transcription processing failed', { - error: error.message, - activeSkill, - programmingLanguage: programmingLanguage || 'not specified', - requestId: this.requestCount - }); - - if (config.get('llm.gemini.fallbackEnabled')) { - return this.generateIntelligentFallbackResponse(text, activeSkill); - } - - throw error; - } - } - - /** - * Normalize all triple-backtick code fences to the selected programming language tag. - * Does not alter the inner code; only ensures fence language tags are correct. - */ - enforceProgrammingLanguage(text, programmingLanguage) { - try { - if (!text || !programmingLanguage) return text; - const norm = String(programmingLanguage).toLowerCase(); - const fenceTagMap = { cpp: 'cpp', c: 'c', python: 'python', java: 'java', javascript: 'javascript', js: 'javascript' }; - const fenceTag = fenceTagMap[norm] || norm || 'text'; - - // Replace all triple-backtick fences' language token with the selected tag - const replacedBackticks = text.replace(/```([^\n]*)\n/g, (match, info) => { - const current = (info || '').trim(); - // If already the desired fenceTag as the first token, keep as is - if (current.split(/\s+/)[0].toLowerCase() === fenceTag) return match; - return '```' + fenceTag + '\n'; - }); - - // Optionally normalize tildes fences to backticks with correct tag - const normalizedTildes = replacedBackticks.replace(/~~~([^\n]*)\n/g, () => '```' + fenceTag + '\n'); - - return normalizedTildes; - } catch (_) { - return text; - } - } - - buildGeminiRequest(text, activeSkill, sessionMemory, programmingLanguage) { - // Check if we have the new conversation history format - const sessionManager = require('../managers/session.manager'); - - if (sessionManager && typeof sessionManager.getConversationHistory === 'function') { - const conversationHistory = sessionManager.getConversationHistory(15); - const skillContext = sessionManager.getSkillContext(activeSkill, programmingLanguage); - return this.buildGeminiRequestWithHistory(text, activeSkill, conversationHistory, skillContext, programmingLanguage); - } - - // Fallback to old method for compatibility - now with programming language support - const requestComponents = promptLoader.getRequestComponents( - activeSkill, - text, - sessionMemory, - programmingLanguage - ); - - const request = { - contents: [] - }; - - this.applyGenerationDefaults(request); - - // Use the skill prompt that already has programming language injected - if (requestComponents.shouldUseModelMemory && requestComponents.skillPrompt) { - request.systemInstruction = { - parts: [{ text: requestComponents.skillPrompt }] - }; - - logger.debug('Using language-enhanced system instruction for skill', { - skill: activeSkill, - programmingLanguage: programmingLanguage || 'not specified', - promptLength: requestComponents.skillPrompt.length, - requiresProgrammingLanguage: requestComponents.requiresProgrammingLanguage - }); - } - - request.contents.push({ - role: 'user', - parts: [{ text: this.formatUserMessage(text, activeSkill) }] - }); - - return request; - } - - buildGeminiRequestWithHistory(text, activeSkill, conversationHistory, skillContext, programmingLanguage) { - const request = { - contents: [] - }; - - this.applyGenerationDefaults(request); - - // Use the skill prompt from context (which may already include programming language) - if (skillContext.skillPrompt) { - request.systemInstruction = { - parts: [{ text: skillContext.skillPrompt }] - }; - - logger.debug('Using skill context prompt as system instruction', { - skill: activeSkill, - programmingLanguage: programmingLanguage || 'not specified', - promptLength: skillContext.skillPrompt.length, - requiresProgrammingLanguage: skillContext.requiresProgrammingLanguage || false, - hasLanguageInjection: programmingLanguage && skillContext.requiresProgrammingLanguage - }); - } - - // Add conversation history (excluding system messages) with validation - const conversationContents = conversationHistory - .filter(event => { - return event.role !== 'system' && - event.content && - typeof event.content === 'string' && - event.content.trim().length > 0; - }) - .map(event => { - const content = event.content.trim(); - return { - role: event.role === 'model' ? 'model' : 'user', - parts: [{ text: content }] - }; - }); - - // Add the conversation history - request.contents.push(...conversationContents); - - // Format and validate the current user input - const formattedMessage = this.formatUserMessage(text, activeSkill); - if (!formattedMessage || formattedMessage.trim().length === 0) { - throw new Error('Failed to format user message or message is empty'); - } - - // Add the current user input - request.contents.push({ - role: 'user', - parts: [{ text: formattedMessage }] - }); - - logger.debug('Built Gemini request with conversation history', { - skill: activeSkill, - programmingLanguage: programmingLanguage || 'not specified', - historyLength: conversationHistory.length, - totalContents: request.contents.length, - hasSystemInstruction: !!request.systemInstruction, - requiresProgrammingLanguage: skillContext.requiresProgrammingLanguage || false - }); - - return request; - } - - buildIntelligentTranscriptionRequest(text, activeSkill, sessionMemory, programmingLanguage) { - // Validate input text first - const cleanText = text && typeof text === 'string' ? text.trim() : ''; - if (!cleanText) { - throw new Error('Empty or invalid transcription text provided to buildIntelligentTranscriptionRequest'); - } - - // Check if we have the new conversation history format - const sessionManager = require('../managers/session.manager'); - - if (sessionManager && typeof sessionManager.getConversationHistory === 'function') { - const conversationHistory = sessionManager.getConversationHistory(10); - const skillContext = sessionManager.getSkillContext(activeSkill, programmingLanguage); - return this.buildIntelligentTranscriptionRequestWithHistory(cleanText, activeSkill, conversationHistory, skillContext, programmingLanguage); - } - - // Fallback to basic intelligent request - const request = { - contents: [] - }; - - this.applyGenerationDefaults(request); - - // Add intelligent filtering system instruction - const intelligentPrompt = this.getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage); - if (!intelligentPrompt) { - throw new Error('Failed to generate intelligent transcription prompt'); - } - - request.systemInstruction = { - parts: [{ text: intelligentPrompt }] - }; - - request.contents.push({ - role: 'user', - parts: [{ text: cleanText }] - }); - - logger.debug('Built basic intelligent transcription request', { - skill: activeSkill, - programmingLanguage: programmingLanguage || 'not specified', - textLength: cleanText.length, - hasSystemInstruction: !!request.systemInstruction - }); - - return request; - } - - buildIntelligentTranscriptionRequestWithHistory(text, activeSkill, conversationHistory, skillContext, programmingLanguage) { - const request = { - contents: [] - }; - - this.applyGenerationDefaults(request); - - // For chat/transcription messages, DO NOT include the full skill prompt; use only the intelligent filter prompt - const intelligentPrompt = this.getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage); - request.systemInstruction = { parts: [{ text: intelligentPrompt }] }; - - // Add recent conversation history (excluding system messages) with validation - const conversationContents = conversationHistory - .filter(event => { - // Filter out system messages and ensure content exists and is valid - return event.role !== 'system' && - event.content && - typeof event.content === 'string' && - event.content.trim().length > 0; - }) - .slice(-8) // Keep last 8 exchanges for context - .map(event => { - const content = event.content.trim(); - if (!content) { - logger.warn('Empty content found in conversation history', { event }); - return null; - } - return { - role: event.role === 'model' ? 'model' : 'user', - parts: [{ text: content }] - }; - }) - .filter(content => content !== null); // Remove any null entries - - // Add the conversation history - request.contents.push(...conversationContents); - - // Validate and add the current transcription - const cleanText = text && typeof text === 'string' ? text.trim() : ''; - if (!cleanText) { - throw new Error('Empty or invalid transcription text provided'); - } - - request.contents.push({ - role: 'user', - parts: [{ text: cleanText }] - }); - - // Ensure we have at least one content item - if (request.contents.length === 0) { - throw new Error('No valid content to send to Gemini API'); - } - - logger.debug('Built intelligent transcription request with conversation history', { - skill: activeSkill, - programmingLanguage: programmingLanguage || 'not specified', - historyLength: conversationHistory.length, - totalContents: request.contents.length, - hasSkillPrompt: !!skillContext.skillPrompt, - cleanTextLength: cleanText.length, - requiresProgrammingLanguage: skillContext.requiresProgrammingLanguage || false - }); - - return request; - } - - getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage) { - let prompt = `# Intelligent Transcription Response System - -Assume you are asked a question in ${activeSkill.toUpperCase()} mode. Your job is to intelligently respond to question/message with appropriate brevity. -Assume you are in an interview and you need to perform best in ${activeSkill.toUpperCase()} mode. -Always respond to the point, do not repeat the question or unnecessary information which is not related to ${activeSkill}.`; - - // Add programming language context if provided - if (programmingLanguage) { - const lang = String(programmingLanguage).toLowerCase(); - const languageMap = { cpp: 'C++', c: 'C', python: 'Python', java: 'Java', javascript: 'JavaScript', js: 'JavaScript' }; - const fenceTagMap = { cpp: 'cpp', c: 'c', python: 'python', java: 'java', javascript: 'javascript', js: 'javascript' }; - const languageTitle = languageMap[lang] || (lang.charAt(0).toUpperCase() + lang.slice(1)); - const fenceTag = fenceTagMap[lang] || lang || 'text'; - prompt += `\n\nCODING CONTEXT: Respond ONLY in ${languageTitle}. All code blocks must use triple backticks with language tag \`\`\`${fenceTag}\`\`\`. Do not include other languages unless explicitly asked.`; - } - - prompt += ` - -## Response Rules: - -### If the transcription is casual conversation, greetings, or NOT related to ${activeSkill}: -- Respond with: "Yeah, I'm listening. Ask your question relevant to ${activeSkill}." -- Or similar brief acknowledgments like: "I'm here, what's your ${activeSkill} question?" - -### If the transcription IS relevant to ${activeSkill} or is a follow-up question: -- Provide a comprehensive, detailed response -- Use bullet points, examples, and explanations -- Focus on actionable insights and complete answers -- Do not truncate or shorten your response - -### Examples of casual/irrelevant messages: -- "Hello", "Hi there", "How are you?" -- "What's the weather like?" -- "I'm just testing this" -- Random conversations not related to ${activeSkill} - -### Examples of relevant messages: -- Actual questions about ${activeSkill} concepts -- Follow-up questions to previous responses -- Requests for clarification on ${activeSkill} topics -- Problem-solving requests related to ${activeSkill} - -## Response Format: -- Keep responses detailed -- Use bullet points for structured answers -- Be encouraging and helpful -- Stay focused on ${activeSkill} - -If the user's input is a coding or DSA problem statement and contains no code, produce a complete, runnable solution in the selected programming language without asking for more details. Always include the final implementation in a properly tagged code block. - -Remember: Be intelligent about filtering - only provide detailed responses when the user actually needs help with ${activeSkill}.`; - - return prompt; - } - - formatUserMessage(text, activeSkill) { - return `Context: ${activeSkill.toUpperCase()} analysis request\n\nText to analyze:\n${text}`; - } - - async executeRequest(geminiRequest) { - const maxRetries = config.get('llm.gemini.maxRetries'); - const timeout = config.get('llm.gemini.timeout'); - - // Add request debugging - logger.debug('Executing Gemini request', { - hasModel: !!this.model, - hasClient: !!this.client, - requestKeys: Object.keys(geminiRequest), - timeout, - maxRetries, - nodeVersion: process.version, - platform: process.platform - }); - - for (let attempt = 1; attempt <= maxRetries; attempt++) { - try { - // Pre-flight check - await this.performPreflightCheck(); - - const timeoutPromise = new Promise((_, reject) => - setTimeout(() => reject(new Error('Request timeout')), timeout) - ); - - logger.debug(`Gemini API attempt ${attempt} starting`, { - timestamp: new Date().toISOString(), - timeout - }); - - const requestPromise = this.model.generateContent(geminiRequest); - const result = await Promise.race([requestPromise, timeoutPromise]); - - if (!result.response) { - throw new Error('Empty response from Gemini API'); - } - - const { text, finishReason } = this.extractTextFromCandidates(result.response); - - if (finishReason === 'MAX_TOKENS') { - logger.warn('Gemini primary response reached max tokens limit', { - attempt, - finishReason - }); - } - - logger.debug('Gemini API request successful', { - attempt, - responseLength: text.length, - finishReason - }); - - return text; - } catch (error) { - const errorInfo = this.analyzeError(error); - - // Enhanced error logging for fetch failures - if (errorInfo.type === 'NETWORK_ERROR') { - logger.error('Network error details', { - attempt, - errorMessage: error.message, - errorStack: error.stack, - errorName: error.name, - nodeEnv: process.env.NODE_ENV, - electronVersion: process.versions.electron, - chromeVersion: process.versions.chrome, - nodeVersion: process.versions.node, - userAgent: this.getUserAgent() - }); - } - - logger.warn(`Gemini API attempt ${attempt} failed`, { - error: error.message, - errorType: errorInfo.type, - isNetworkError: errorInfo.isNetworkError, - suggestedAction: errorInfo.suggestedAction, - remainingAttempts: maxRetries - attempt - }); - - if (attempt === maxRetries) { - const finalError = new Error(`Gemini API failed after ${maxRetries} attempts: ${error.message}`); - finalError.errorAnalysis = errorInfo; - finalError.originalError = error; - throw finalError; - } - - // Use exponential backoff with jitter for network errors - const baseDelay = errorInfo.isNetworkError ? 2500 : 1500; - const delay = baseDelay * attempt + Math.random() * 1000; - - logger.debug(`Waiting ${delay}ms before retry ${attempt + 1}`, { - baseDelay, - isNetworkError: errorInfo.isNetworkError - }); - - await this.delay(delay); - } - } - } - - async performPreflightCheck() { - // Quick connectivity check - try { - const startTime = Date.now(); - await this.testNetworkConnection({ - host: 'generativelanguage.googleapis.com', - port: 443, - name: 'Gemini API Endpoint' - }); - const latency = Date.now() - startTime; - - logger.debug('Preflight check passed', { latency }); - } catch (error) { - logger.warn('Preflight check failed', { - error: error.message, - suggestion: 'Network connectivity issue detected before API call' - }); - // Don't throw here - let the actual API call fail with more detail - } - } - - getUserAgent() { - try { - // Try to get user agent from Electron if available - if (typeof navigator !== 'undefined' && navigator.userAgent) { - return navigator.userAgent; - } - return `Node.js/${process.version} (${process.platform}; ${process.arch})`; - } catch { - return 'Unknown'; - } - } - - analyzeError(error) { - const errorMessage = error.message.toLowerCase(); - - // Network connectivity errors - if (errorMessage.includes('fetch failed') || - errorMessage.includes('network error') || - errorMessage.includes('enotfound') || - errorMessage.includes('econnrefused') || - errorMessage.includes('timeout')) { - return { - type: 'NETWORK_ERROR', - isNetworkError: true, - suggestedAction: 'Check internet connection and firewall settings' - }; - } - - // API key errors - if (errorMessage.includes('unauthorized') || - errorMessage.includes('invalid api key') || - errorMessage.includes('forbidden')) { - return { - type: 'AUTH_ERROR', - isNetworkError: false, - suggestedAction: 'Verify Gemini API key configuration' - }; - } - - // Rate limiting - if (errorMessage.includes('quota') || - errorMessage.includes('rate limit') || - errorMessage.includes('too many requests')) { - return { - type: 'RATE_LIMIT_ERROR', - isNetworkError: false, - suggestedAction: 'Wait before retrying or check API quota' - }; - } - - // Timeout errors - if (errorMessage.includes('request timeout') || errorMessage.includes('etimedout')) { - return { - type: 'TIMEOUT_ERROR', - isNetworkError: true, - suggestedAction: 'Check network latency or increase timeout' - }; - } - - return { - type: 'UNKNOWN_ERROR', - isNetworkError: false, - suggestedAction: 'Check logs for more details' - }; - } - - async checkNetworkConnectivity() { - const connectivityTests = [ - { host: 'google.com', port: 443, name: 'Google (HTTPS)' }, - { host: 'generativelanguage.googleapis.com', port: 443, name: 'Gemini API Endpoint' } - ]; - - const results = await Promise.allSettled( - connectivityTests.map(test => this.testNetworkConnection(test)) - ); - - const connectivity = { - timestamp: new Date().toISOString(), - tests: results.map((result, index) => ({ - ...connectivityTests[index], - success: result.status === 'fulfilled' && result.value, - error: result.status === 'rejected' ? result.reason.message : null - })) - }; - - logger.info('Network connectivity check completed', connectivity); - return connectivity; - } - - async testNetworkConnection({ host, port, name }) { - return new Promise((resolve, reject) => { - const net = require('net'); - const socket = new net.Socket(); - - const timeout = setTimeout(() => { - socket.destroy(); - reject(new Error(`Connection timeout to ${host}:${port}`)); - }, 5000); - - socket.on('connect', () => { - clearTimeout(timeout); - socket.destroy(); - resolve(true); - }); - - socket.on('error', (error) => { - clearTimeout(timeout); - reject(new Error(`Connection failed to ${host}:${port}: ${error.message}`)); - }); - - socket.connect(port, host); - }); - } - - generateFallbackResponse(text, activeSkill) { - logger.info('Generating fallback response', { activeSkill }); - - const fallbackResponses = { - 'dsa': 'This appears to be a data structures and algorithms problem. Consider breaking it down into smaller components and identifying the appropriate algorithm or data structure to use.', - 'system-design': 'For this system design question, consider scalability, reliability, and the trade-offs between different architectural approaches.', - 'programming': 'This looks like a programming challenge. Focus on understanding the requirements, edge cases, and optimal time/space complexity.', - 'default': 'I can help analyze this content. Please ensure your Gemini API key is properly configured for detailed analysis.' - }; - - const response = fallbackResponses[activeSkill] || fallbackResponses.default; - - return { - response, - metadata: { - skill: activeSkill, - processingTime: 0, - requestId: this.requestCount, - usedFallback: true - } - }; - } - - generateIntelligentFallbackResponse(text, activeSkill) { - logger.info('Generating intelligent fallback response for transcription', { activeSkill }); - - // Simple heuristic to determine if message seems skill-related - const skillKeywords = { - 'dsa': ['algorithm', 'data structure', 'array', 'tree', 'graph', 'sort', 'search', 'complexity', 'big o'], - 'programming': ['code', 'function', 'variable', 'class', 'method', 'bug', 'debug', 'syntax'], - 'system-design': ['scalability', 'database', 'architecture', 'microservice', 'load balancer', 'cache'], - 'behavioral': ['interview', 'experience', 'situation', 'leadership', 'conflict', 'team'], - 'sales': ['customer', 'deal', 'negotiation', 'price', 'revenue', 'prospect'], - 'presentation': ['slide', 'audience', 'public speaking', 'presentation', 'nervous'], - 'data-science': ['data', 'model', 'machine learning', 'statistics', 'analytics', 'python', 'pandas'], - 'devops': ['deployment', 'ci/cd', 'docker', 'kubernetes', 'infrastructure', 'monitoring'], - 'negotiation': ['negotiate', 'compromise', 'agreement', 'terms', 'conflict resolution'] - }; - - const textLower = text.toLowerCase(); - const relevantKeywords = skillKeywords[activeSkill] || []; - const hasRelevantKeywords = relevantKeywords.some(keyword => textLower.includes(keyword)); - - // Check for question indicators - const questionIndicators = ['how', 'what', 'why', 'when', 'where', 'can you', 'could you', 'should i', '?']; - const seemsLikeQuestion = questionIndicators.some(indicator => textLower.includes(indicator)); - - let response; - if (hasRelevantKeywords || seemsLikeQuestion) { - response = `I'm having trouble processing that right now, but it sounds like a ${activeSkill} question. Could you rephrase or ask more specifically about what you need help with?`; - } else { - response = `Yeah, I'm listening. Ask your question relevant to ${activeSkill}.`; - } - - return { - response, - metadata: { - skill: activeSkill, - processingTime: 0, - requestId: this.requestCount, - usedFallback: true, - isTranscriptionResponse: true - } - }; - } - - async testConnection() { - if (!this.isInitialized) { - return { success: false, error: 'Service not initialized' }; - } - - try { - // First check network connectivity - const networkCheck = await this.checkNetworkConnectivity(); - const hasNetworkIssues = networkCheck.tests.some(test => !test.success); - - if (hasNetworkIssues) { - logger.warn('Network connectivity issues detected', networkCheck); - } - - const testRequest = { - contents: [{ - role: 'user', - parts: [{ text: 'Test connection. Please respond with "OK".' }] - }] - }; - - this.applyGenerationDefaults(testRequest, { temperature: 0, maxOutputTokens: 10 }); - - const startTime = Date.now(); - const result = await this.model.generateContent(testRequest); - const latency = Date.now() - startTime; - const { text } = this.extractTextFromCandidates(result.response); - - logger.info('Connection test successful', { - response: text, - latency, - networkCheck: hasNetworkIssues ? 'issues_detected' : 'healthy' - }); - - return { - success: true, - response: text, - latency, - networkConnectivity: networkCheck - }; - } catch (error) { - const errorAnalysis = this.analyzeError(error); - logger.error('Connection test failed', { - error: error.message, - errorAnalysis - }); - - return { - success: false, - error: error.message, - errorAnalysis, - networkConnectivity: await this.checkNetworkConnectivity().catch(() => null) - }; - } - } - - updateApiKey(newApiKey) { - process.env.GEMINI_API_KEY = newApiKey; - this.isInitialized = false; - this.initializeClient(); - - logger.info('API key updated and client reinitialized'); - } - - getStats() { - return { - isInitialized: this.isInitialized, - requestCount: this.requestCount, - errorCount: this.errorCount, - successRate: this.requestCount > 0 ? ((this.requestCount - this.errorCount) / this.requestCount) * 100 : 0, - config: config.get('llm.gemini') - }; - } - - delay(ms) { - return new Promise(resolve => setTimeout(resolve, ms)); - } - - async executeAlternativeRequest(geminiRequest) { - const https = require('https'); - const apiKey = config.getApiKey('GEMINI'); - const model = config.get('llm.gemini.model'); - - logger.info('Using alternative HTTPS request method'); - - const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`; - - const postData = JSON.stringify(geminiRequest); - - const agent = new https.Agent({ keepAlive: true, maxSockets: 1 }); - - const options = { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'x-goog-api-key': apiKey, - 'Content-Length': Buffer.byteLength(postData), - 'User-Agent': this.getUserAgent() - }, - timeout: config.get('llm.gemini.timeout'), - agent - }; - - return new Promise((resolve, reject) => { - const req = https.request(url, options, (res) => { - let data = ''; - - res.on('data', (chunk) => { - data += chunk; - }); - - res.on('end', () => { - try { - if (res.statusCode !== 200) { - reject(new Error(`HTTP ${res.statusCode}: ${data}`)); - return; - } - - const response = JSON.parse(data); - - logger.debug('Alternative request response structure', { - hasResponse: !!response, - hasCandidates: !!response.candidates, - candidatesLength: response.candidates?.length, - responseKeys: Object.keys(response || {}), - firstCandidateKeys: response.candidates?.[0] ? Object.keys(response.candidates[0]) : [] - }); - - const { text, finishReason } = this.extractTextFromCandidates(response); +function getProviderName() { + return (process.env.LLM_PROVIDER || config.get('llm.provider') || 'gemini').toLowerCase(); +} - if (finishReason === 'MAX_TOKENS') { - logger.warn('Gemini alternative response reached max tokens limit', { - finishReason - }); - } - - logger.info('Alternative request successful', { - responseLength: text.length, - statusCode: res.statusCode, - finishReason - }); - - resolve(text.trim()); - } catch (parseError) { - logger.error('Failed to parse alternative response', { - error: parseError.message, - rawResponse: data.substring(0, 500), - statusCode: res.statusCode - }); - reject(new Error(`Failed to parse response: ${parseError.message}`)); - } - }); - }); - - req.on('error', (error) => { - reject(new Error(`Alternative request failed: ${error.message}`)); - }); - - req.on('timeout', () => { - req.destroy(); - reject(new Error('Alternative request timeout')); - }); - - req.write(postData); - req.end(); - }); +function getService() { + const provider = getProviderName(); + if (provider === 'ollama') { + return require('./ollama.service'); } + return require('./gemini.service'); } -module.exports = new LLMService(); \ No newline at end of file +// Dynamic proxy — all property accesses are forwarded to the active provider service. +// Switching LLM_PROVIDER at runtime (or via config.set) is picked up automatically. +module.exports = new Proxy( + { _getProviderName: getProviderName }, + { + get(target, prop) { + if (prop === '_getProviderName') return target._getProviderName; + const svc = getService(); + const val = svc[prop]; + return typeof val === 'function' ? val.bind(svc) : val; + }, + set(_, prop, value) { + getService()[prop] = value; + return true; + } + } +); diff --git a/src/services/ollama.service.js b/src/services/ollama.service.js new file mode 100644 index 00000000..0aaf73d2 --- /dev/null +++ b/src/services/ollama.service.js @@ -0,0 +1,533 @@ +const http = require('http'); +const https = require('https'); +const logger = require('../core/logger').createServiceLogger('OLLAMA'); +const config = require('../core/config'); + +class OllamaService { + constructor() { + this.isInitialized = false; + this.requestCount = 0; + this.errorCount = 0; + this.initializeClient(); + } + + initializeClient() { + // Ollama requires no API key — just mark ready and verify on first use + this.isInitialized = true; + logger.info('Ollama service ready', { + baseUrl: this.getBaseUrl(), + model: this.getModel(), + visionModel: this.getVisionModel() + }); + } + + getBaseUrl() { + return process.env.OLLAMA_BASE_URL || config.get('llm.ollama.baseUrl') || 'http://localhost:11434'; + } + + getModel() { + return process.env.OLLAMA_MODEL || config.get('llm.ollama.model') || 'llama3.2'; + } + + getVisionModel() { + return process.env.OLLAMA_VISION_MODEL || config.get('llm.ollama.visionModel') || 'llava'; + } + + getTimeout() { + return config.get('llm.ollama.timeout') || 120000; + } + + // ── Public interface (matches GeminiService) ───────────────────────────── + + async processImageWithSkill(imageBuffer, mimeType, activeSkill, sessionMemory = [], programmingLanguage = null) { + if (!imageBuffer || !Buffer.isBuffer(imageBuffer)) { + throw new Error('Invalid image buffer provided to processImageWithSkill'); + } + + const startTime = Date.now(); + this.requestCount++; + + try { + const { promptLoader } = require('../../prompt-loader'); + const skillPrompt = promptLoader.getSkillPrompt(activeSkill, programmingLanguage) || ''; + + const base64 = imageBuffer.toString('base64'); + const instruction = this.formatImageInstruction(activeSkill, programmingLanguage); + + const messages = []; + if (skillPrompt) { + messages.push({ role: 'system', content: skillPrompt }); + } + messages.push({ + role: 'user', + content: instruction, + images: [base64] + }); + + const responseText = await this.callOllama(messages, { model: this.getVisionModel() }); + + const finalResponse = programmingLanguage + ? this.enforceProgrammingLanguage(responseText, programmingLanguage) + : responseText; + + logger.info('Ollama image processing complete', { + activeSkill, + imageSize: imageBuffer.length, + responseLength: finalResponse.length, + processingTime: Date.now() - startTime + }); + + return { + response: finalResponse, + metadata: { + skill: activeSkill, + programmingLanguage, + processingTime: Date.now() - startTime, + requestId: this.requestCount, + usedFallback: false, + isImageAnalysis: true, + mimeType, + provider: 'ollama', + model: this.getVisionModel() + } + }; + } catch (error) { + this.errorCount++; + logger.error('Ollama image processing failed', { error: error.message, activeSkill }); + if (config.get('llm.ollama.fallbackEnabled')) { + return this.generateFallbackResponse('[image]', activeSkill); + } + throw error; + } + } + + async processTextWithSkill(text, activeSkill, sessionMemory = [], programmingLanguage = null) { + const startTime = Date.now(); + this.requestCount++; + + try { + logger.info('Ollama: processing text', { + activeSkill, + textLength: text.length, + programmingLanguage: programmingLanguage || 'not specified' + }); + + const messages = this.buildChatMessages(text, activeSkill, sessionMemory, programmingLanguage); + const responseText = await this.callOllama(messages); + + const finalResponse = programmingLanguage + ? this.enforceProgrammingLanguage(responseText, programmingLanguage) + : responseText; + + return { + response: finalResponse, + metadata: { + skill: activeSkill, + programmingLanguage, + processingTime: Date.now() - startTime, + requestId: this.requestCount, + usedFallback: false, + provider: 'ollama', + model: this.getModel() + } + }; + } catch (error) { + this.errorCount++; + logger.error('Ollama text processing failed', { error: error.message, activeSkill }); + if (config.get('llm.ollama.fallbackEnabled')) { + return this.generateFallbackResponse(text, activeSkill); + } + throw error; + } + } + + async processTranscriptionWithIntelligentResponse(text, activeSkill, sessionMemory = [], programmingLanguage = null) { + const cleanText = text && typeof text === 'string' ? text.trim() : ''; + if (!cleanText) { + throw new Error('Empty transcription text'); + } + + const startTime = Date.now(); + this.requestCount++; + + try { + logger.info('Ollama: processing transcription', { + activeSkill, + textLength: cleanText.length, + programmingLanguage: programmingLanguage || 'not specified' + }); + + const messages = this.buildTranscriptionMessages(cleanText, activeSkill, sessionMemory, programmingLanguage); + const responseText = await this.callOllama(messages); + + const finalResponse = programmingLanguage + ? this.enforceProgrammingLanguage(responseText, programmingLanguage) + : responseText; + + return { + response: finalResponse, + metadata: { + skill: activeSkill, + programmingLanguage, + processingTime: Date.now() - startTime, + requestId: this.requestCount, + usedFallback: false, + isTranscriptionResponse: true, + provider: 'ollama', + model: this.getModel() + } + }; + } catch (error) { + this.errorCount++; + logger.error('Ollama transcription processing failed', { error: error.message, activeSkill }); + if (config.get('llm.ollama.fallbackEnabled')) { + return this.generateIntelligentFallbackResponse(cleanText, activeSkill); + } + throw error; + } + } + + async testConnection() { + try { + const baseUrl = this.getBaseUrl(); + const parsed = new URL(baseUrl); + + // Hit /api/tags — lightweight endpoint that lists available models + const responseText = await this.httpGet(`${baseUrl}/api/tags`); + const data = JSON.parse(responseText); + const models = (data.models || []).map(m => m.name); + + logger.info('Ollama connection test successful', { baseUrl, models }); + return { success: true, baseUrl, models, provider: 'ollama' }; + } catch (error) { + logger.error('Ollama connection test failed', { error: error.message }); + return { success: false, error: error.message, provider: 'ollama' }; + } + } + + async checkNetworkConnectivity() { + const baseUrl = this.getBaseUrl(); + let parsed; + try { + parsed = new URL(baseUrl); + } catch { + parsed = { hostname: 'localhost', port: 11434 }; + } + + const host = parsed.hostname || 'localhost'; + const port = parseInt(parsed.port) || 11434; + + const tests = [{ host, port, name: `Ollama server (${host}:${port})` }]; + const results = await Promise.allSettled(tests.map(t => this.testTcpConnection(t))); + + return { + timestamp: new Date().toISOString(), + tests: results.map((result, i) => ({ + ...tests[i], + success: result.status === 'fulfilled' && result.value, + error: result.status === 'rejected' ? result.reason.message : null + })) + }; + } + + testTcpConnection({ host, port }) { + return new Promise((resolve, reject) => { + const net = require('net'); + const socket = new net.Socket(); + + const timeout = setTimeout(() => { + socket.destroy(); + reject(new Error(`Connection timeout to ${host}:${port}`)); + }, 5000); + + socket.on('connect', () => { clearTimeout(timeout); socket.destroy(); resolve(true); }); + socket.on('error', err => { clearTimeout(timeout); reject(new Error(`Connection failed to ${host}:${port}: ${err.message}`)); }); + socket.connect(port, host); + }); + } + + getStats() { + return { + isInitialized: this.isInitialized, + requestCount: this.requestCount, + errorCount: this.errorCount, + successRate: this.requestCount > 0 ? ((this.requestCount - this.errorCount) / this.requestCount) * 100 : 0, + baseUrl: this.getBaseUrl(), + model: this.getModel(), + visionModel: this.getVisionModel(), + provider: 'ollama' + }; + } + + // updateApiKey is a no-op for Ollama (no API key required) + updateApiKey(_key) { + logger.info('Ollama does not use an API key — skipping updateApiKey'); + } + + // ── Message builders ────────────────────────────────────────────────────── + + buildChatMessages(text, activeSkill, sessionMemory, programmingLanguage) { + const { promptLoader } = require('../../prompt-loader'); + const skillPrompt = promptLoader.getSkillPrompt(activeSkill, programmingLanguage); + + const messages = []; + + if (skillPrompt) { + messages.push({ role: 'system', content: skillPrompt }); + } + + // Add conversation history from session manager + try { + const sessionManager = require('../managers/session.manager'); + if (sessionManager && typeof sessionManager.getConversationHistory === 'function') { + const history = sessionManager.getConversationHistory(15); + for (const event of history) { + if (event.role === 'system') continue; + if (!event.content || !event.content.trim()) continue; + messages.push({ + role: event.role === 'model' ? 'assistant' : 'user', + content: event.content.trim() + }); + } + } + } catch (_) { /* session manager unavailable */ } + + messages.push({ + role: 'user', + content: `Context: ${activeSkill.toUpperCase()} analysis request\n\nText to analyze:\n${text}` + }); + + return messages; + } + + buildTranscriptionMessages(text, activeSkill, sessionMemory, programmingLanguage) { + const systemPrompt = this.getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage); + const messages = [{ role: 'system', content: systemPrompt }]; + + try { + const sessionManager = require('../managers/session.manager'); + if (sessionManager && typeof sessionManager.getConversationHistory === 'function') { + const history = sessionManager.getConversationHistory(10); + const recent = history.filter(e => e.role !== 'system' && e.content && e.content.trim()).slice(-8); + for (const event of recent) { + messages.push({ + role: event.role === 'model' ? 'assistant' : 'user', + content: event.content.trim() + }); + } + } + } catch (_) { /* session manager unavailable */ } + + messages.push({ role: 'user', content: text }); + return messages; + } + + getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage) { + let prompt = `# Intelligent Transcription Response System + +Assume you are asked a question in ${activeSkill.toUpperCase()} mode. Your job is to intelligently respond to question/message with appropriate brevity. +Assume you are in an interview and you need to perform best in ${activeSkill.toUpperCase()} mode. +Always respond to the point, do not repeat the question or unnecessary information which is not related to ${activeSkill}.`; + + if (programmingLanguage) { + const lang = String(programmingLanguage).toLowerCase(); + const languageMap = { cpp: 'C++', c: 'C', python: 'Python', java: 'Java', javascript: 'JavaScript', js: 'JavaScript' }; + const fenceTagMap = { cpp: 'cpp', c: 'c', python: 'python', java: 'java', javascript: 'javascript', js: 'javascript' }; + const languageTitle = languageMap[lang] || (lang.charAt(0).toUpperCase() + lang.slice(1)); + const fenceTag = fenceTagMap[lang] || lang || 'text'; + prompt += `\n\nCODING CONTEXT: Respond ONLY in ${languageTitle}. All code blocks must use triple backticks with language tag \`\`\`${fenceTag}\`\`\`. Do not include other languages unless explicitly asked.`; + } + + prompt += ` + +## Response Rules: + +### If the transcription is casual conversation, greetings, or NOT related to ${activeSkill}: +- Respond with: "Yeah, I'm listening. Ask your question relevant to ${activeSkill}." + +### If the transcription IS relevant to ${activeSkill} or is a follow-up question: +- Provide a comprehensive, detailed response +- Use bullet points, examples, and explanations +- Focus on actionable insights and complete answers + +If the user's input is a coding or DSA problem statement and contains no code, produce a complete, runnable solution in the selected programming language without asking for more details. Always include the final implementation in a properly tagged code block. + +Remember: Be intelligent about filtering - only provide detailed responses when the user actually needs help with ${activeSkill}.`; + + return prompt; + } + + formatImageInstruction(activeSkill, programmingLanguage) { + const langNote = programmingLanguage ? ` Use only ${programmingLanguage.toUpperCase()} for any code.` : ''; + return `Analyze this image for a ${activeSkill.toUpperCase()} question. Extract the problem concisely and provide the best possible solution with explanation and final code.${langNote}`; + } + + enforceProgrammingLanguage(text, programmingLanguage) { + try { + if (!text || !programmingLanguage) return text; + const norm = String(programmingLanguage).toLowerCase(); + const fenceTagMap = { cpp: 'cpp', c: 'c', python: 'python', java: 'java', javascript: 'javascript', js: 'javascript' }; + const fenceTag = fenceTagMap[norm] || norm || 'text'; + + const replaced = text.replace(/```([^\n]*)\n/g, (match, info) => { + const current = (info || '').trim(); + if (current.split(/\s+/)[0].toLowerCase() === fenceTag) return match; + return '```' + fenceTag + '\n'; + }); + return replaced.replace(/~~~([^\n]*)\n/g, () => '```' + fenceTag + '\n'); + } catch (_) { + return text; + } + } + + // ── HTTP helpers ────────────────────────────────────────────────────────── + + async callOllama(messages, opts = {}) { + const baseUrl = this.getBaseUrl(); + const model = opts.model || this.getModel(); + const timeout = this.getTimeout(); + + const body = { + model, + messages, + stream: false, + options: { + temperature: config.get('llm.ollama.generation.temperature') ?? 0.7, + num_predict: config.get('llm.ollama.generation.maxOutputTokens') ?? 4096 + } + }; + + const postData = JSON.stringify(body); + const parsed = new URL(`${baseUrl}/api/chat`); + const isHttps = parsed.protocol === 'https:'; + const port = parsed.port || (isHttps ? 443 : 11434); + + logger.debug('Calling Ollama', { model, messagesCount: messages.length, baseUrl }); + + return new Promise((resolve, reject) => { + const lib = isHttps ? https : http; + const options = { + hostname: parsed.hostname, + port, + path: parsed.pathname, + method: 'POST', + headers: { + 'Content-Type': 'application/json', + 'Content-Length': Buffer.byteLength(postData) + } + }; + + const timer = setTimeout(() => { + req.destroy(); + reject(new Error(`Ollama request timed out after ${timeout}ms`)); + }, timeout); + + const req = lib.request(options, (res) => { + let data = ''; + res.on('data', chunk => { data += chunk; }); + res.on('end', () => { + clearTimeout(timer); + try { + if (res.statusCode !== 200) { + reject(new Error(`Ollama HTTP ${res.statusCode}: ${data.slice(0, 300)}`)); + return; + } + const parsed = JSON.parse(data); + const content = parsed?.message?.content; + if (typeof content !== 'string' || !content.trim()) { + reject(new Error('Ollama returned empty content')); + return; + } + resolve(content.trim()); + } catch (e) { + reject(new Error(`Failed to parse Ollama response: ${e.message}`)); + } + }); + }); + + req.on('error', err => { clearTimeout(timer); reject(new Error(`Ollama connection error: ${err.message}`)); }); + req.write(postData); + req.end(); + }); + } + + httpGet(url) { + const parsed = new URL(url); + const isHttps = parsed.protocol === 'https:'; + const lib = isHttps ? https : http; + + return new Promise((resolve, reject) => { + const req = lib.get(url, (res) => { + let data = ''; + res.on('data', chunk => { data += chunk; }); + res.on('end', () => { + if (res.statusCode !== 200) { + reject(new Error(`HTTP ${res.statusCode}`)); + } else { + resolve(data); + } + }); + }); + req.on('error', reject); + req.setTimeout(5000, () => { req.destroy(); reject(new Error('Request timeout')); }); + }); + } + + // ── Fallback responses ──────────────────────────────────────────────────── + + generateFallbackResponse(text, activeSkill) { + const fallbackResponses = { + 'dsa': 'This appears to be a data structures and algorithms problem. Consider breaking it down into smaller components and identifying the appropriate algorithm or data structure to use.', + 'system-design': 'For this system design question, consider scalability, reliability, and the trade-offs between different architectural approaches.', + 'programming': 'This looks like a programming challenge. Focus on understanding the requirements, edge cases, and optimal time/space complexity.', + 'default': 'I can help analyze this content. Please ensure Ollama is running and a model is pulled (e.g., ollama pull llama3.2).' + }; + + return { + response: fallbackResponses[activeSkill] || fallbackResponses.default, + metadata: { + skill: activeSkill, + processingTime: 0, + requestId: this.requestCount, + usedFallback: true, + provider: 'ollama' + } + }; + } + + generateIntelligentFallbackResponse(text, activeSkill) { + const skillKeywords = { + 'dsa': ['algorithm', 'data structure', 'array', 'tree', 'graph', 'sort', 'search', 'complexity', 'big o'], + 'programming': ['code', 'function', 'variable', 'class', 'method', 'bug', 'debug', 'syntax'], + 'system-design': ['scalability', 'database', 'architecture', 'microservice', 'load balancer', 'cache'] + }; + + const textLower = text.toLowerCase(); + const relevantKeywords = skillKeywords[activeSkill] || []; + const hasRelevantKeywords = relevantKeywords.some(kw => textLower.includes(kw)); + const questionIndicators = ['how', 'what', 'why', 'when', 'where', 'can you', 'could you', '?']; + const seemsLikeQuestion = questionIndicators.some(i => textLower.includes(i)); + + const response = (hasRelevantKeywords || seemsLikeQuestion) + ? `I'm having trouble connecting to Ollama right now. Please ensure Ollama is running and a model is available (ollama pull ${this.getModel()}).` + : `Yeah, I'm listening. Ask your question relevant to ${activeSkill}.`; + + return { + response, + metadata: { + skill: activeSkill, + processingTime: 0, + requestId: this.requestCount, + usedFallback: true, + isTranscriptionResponse: true, + provider: 'ollama' + } + }; + } + + delay(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); + } +} + +module.exports = new OllamaService(); diff --git a/src/ui/settings-window.js b/src/ui/settings-window.js index 2a7047ec..4a8a2739 100644 --- a/src/ui/settings-window.js +++ b/src/ui/settings-window.js @@ -14,6 +14,12 @@ document.addEventListener('DOMContentLoaded', () => { const whisperLanguageInput = document.getElementById('whisperLanguage'); const whisperSegmentMsInput = document.getElementById('whisperSegmentMs'); const geminiKeyInput = document.getElementById('geminiKey'); + const llmProviderSelect = document.getElementById('llmProvider'); + const geminiSection = document.getElementById('geminiSection'); + const ollamaSection = document.getElementById('ollamaSection'); + const ollamaBaseUrlInput = document.getElementById('ollamaBaseUrl'); + const ollamaModelInput = document.getElementById('ollamaModel'); + const ollamaVisionModelInput = document.getElementById('ollamaVisionModel'); const windowGapInput = document.getElementById('windowGap'); const codingLanguageSelect = document.getElementById('codingLanguage'); const activeSkillSelect = document.getElementById('activeSkill'); @@ -79,7 +85,13 @@ document.addEventListener('DOMContentLoaded', () => { if (settings.whisperLanguage && whisperLanguageInput) whisperLanguageInput.value = settings.whisperLanguage; if (settings.whisperSegmentMs && whisperSegmentMsInput) whisperSegmentMsInput.value = settings.whisperSegmentMs; if (settings.geminiKey && geminiKeyInput) geminiKeyInput.value = settings.geminiKey; + if (settings.llmProvider && llmProviderSelect) llmProviderSelect.value = settings.llmProvider; + if (settings.ollamaBaseUrl && ollamaBaseUrlInput) ollamaBaseUrlInput.value = settings.ollamaBaseUrl; + if (settings.ollamaModel && ollamaModelInput) ollamaModelInput.value = settings.ollamaModel; + if (settings.ollamaVisionModel && ollamaVisionModelInput) ollamaVisionModelInput.value = settings.ollamaVisionModel; if (settings.windowGap && windowGapInput) windowGapInput.value = settings.windowGap; + + updateLlmProviderVisibility(); // Set C++ as default if no coding language is specified if (codingLanguageSelect) { @@ -135,6 +147,10 @@ document.addEventListener('DOMContentLoaded', () => { if (whisperLanguageInput) settings.whisperLanguage = whisperLanguageInput.value; if (whisperSegmentMsInput) settings.whisperSegmentMs = whisperSegmentMsInput.value; if (geminiKeyInput) settings.geminiKey = geminiKeyInput.value; + if (llmProviderSelect) settings.llmProvider = llmProviderSelect.value; + if (ollamaBaseUrlInput) settings.ollamaBaseUrl = ollamaBaseUrlInput.value; + if (ollamaModelInput) settings.ollamaModel = ollamaModelInput.value; + if (ollamaVisionModelInput) settings.ollamaVisionModel = ollamaVisionModelInput.value; if (windowGapInput) settings.windowGap = windowGapInput.value; if (codingLanguageSelect) settings.codingLanguage = codingLanguageSelect.value; if (activeSkillSelect) settings.activeSkill = activeSkillSelect.value; @@ -142,6 +158,21 @@ document.addEventListener('DOMContentLoaded', () => { window.api.send('save-settings', settings); }; + const updateLlmProviderVisibility = () => { + const provider = llmProviderSelect ? llmProviderSelect.value : 'gemini'; + if (geminiSection) geminiSection.style.display = provider === 'gemini' ? '' : 'none'; + if (ollamaSection) ollamaSection.style.display = provider === 'ollama' ? '' : 'none'; + }; + + if (llmProviderSelect) { + llmProviderSelect.addEventListener('change', () => { + updateLlmProviderVisibility(); + saveSettings(); + }); + } + + updateLlmProviderVisibility(); + const updateSpeechFieldStates = () => { const provider = speechProviderSelect ? speechProviderSelect.value : 'azure'; const azureDisabled = provider !== 'azure'; @@ -166,6 +197,9 @@ document.addEventListener('DOMContentLoaded', () => { whisperLanguageInput, whisperSegmentMsInput, geminiKeyInput, + ollamaBaseUrlInput, + ollamaModelInput, + ollamaVisionModelInput, windowGapInput ];