From 07196b0d6f6979b8b3051a5ea207736151da1e42 Mon Sep 17 00:00:00 2001 From: Matthias Date: Tue, 2 Jun 2026 17:39:00 +0100 Subject: [PATCH 1/9] Fix Azure Speech worker crash, implement Groq model rotation for rate limits, optimize response brevity and ignore own speech --- .gitignore | 5 +- index.html | 4 + main.js | 176 +++-- package-lock.json | 215 +++++- package.json | 5 +- preload.js | 3 + speech-worker.js | 469 +++++++++++++ src/core/config.js | 13 +- src/managers/session.manager.js | 18 + src/services/llm.service.js | 892 +++++------------------- src/services/speech.service.js | 1158 +++++++------------------------ src/ui/main-window.js | 47 +- 12 files changed, 1291 insertions(+), 1714 deletions(-) create mode 100644 speech-worker.js diff --git a/.gitignore b/.gitignore index 65a6a549..e7271330 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,7 @@ node_modules/ .env eng.traineddata dist/ -.DS_Store \ No newline at end of file +.DS_Store +roots.sst +tmp_silence.wav +test-azure-speech.js \ No newline at end of file diff --git a/index.html b/index.html index 31e37f5d..62f950b7 100644 --- a/index.html +++ b/index.html @@ -362,6 +362,10 @@
+
+ +
+
DSA diff --git a/main.js b/main.js index 0d4019da..cc67c15d 100644 --- a/main.js +++ b/main.js @@ -1,6 +1,22 @@ require("dotenv").config(); -const { app, BrowserWindow, globalShortcut, session, ipcMain } = require("electron"); +const { app, BrowserWindow, globalShortcut, session, ipcMain, dialog } = require("electron"); +const fs = require('fs'); + +// Polyfills for pdf-parse in Electron's Main process +if (typeof global.DOMMatrix === 'undefined') { + global.DOMMatrix = class DOMMatrix {}; + global.ImageData = class ImageData {}; + global.Path2D = class Path2D {}; +} +const pdfParse = require('pdf-parse'); +const mammoth = require('mammoth'); +// Prefer system certificate store for TLS to avoid Chromium-only cert issues +try { + app.commandLine.appendSwitch('enable-features', 'UseSystemTrustStore'); +} catch (e) { + // If appendSwitch is not available yet, ignore — this is best-effort +} const logger = require("./src/core/logger").createServiceLogger("MAIN"); const config = require("./src/core/config"); @@ -113,16 +129,25 @@ class ApplicationController { if (details.url.includes('generativelanguage.googleapis.com')) { details.requestHeaders['User-Agent'] = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.156 Safari/537.36'; } + // Ensure Azure Speech requests use a friendly User-Agent where needed + if (details.url.includes('.speech.microsoft.com') || details.url.includes('cognitiveservices.azure.com')) { + details.requestHeaders['User-Agent'] = details.requestHeaders['User-Agent'] || 'OpenCluely/1.0 (Electron)'; + } callback({ requestHeaders: details.requestHeaders }); }); - // Handle certificate errors for Google APIs + // Handle certificate verification for known trusted endpoints (best-effort) + // NOTE: Azure Speech endpoints no longer need Chromium-level trust bypass + // because the SDK now runs in a pure Node.js child process (speech-worker.js). ses.setCertificateVerifyProc((request, callback) => { + // Trust Google's generative language API (used for Gemini) if (request.hostname === 'generativelanguage.googleapis.com') { - callback(0); // Trust Google's certificates - } else { - callback(-2); // Use default verification + callback(0); // Trust + return; } + + // Otherwise, use default verification + callback(-2); }); logger.debug('Network configuration applied for Gemini API'); @@ -191,18 +216,33 @@ class ApplicationController { window.webContents.send("transcription-received", { text }); }); - // Automatically process transcription with LLM for intelligent response - setTimeout(async () => { - try { - const sessionHistory = sessionManager.getOptimizedHistory(); - await this.processTranscriptionWithLLM(text, sessionHistory); - } catch (error) { - logger.error("Failed to process transcription with LLM", { - error: error.message, - text: text.substring(0, 100) - }); + // Accumulate text for LLM processing to avoid rate limits + if (!this.accumulatedTranscription) { + this.accumulatedTranscription = ""; + } + this.accumulatedTranscription += " " + text; + + if (this.transcriptionDebounceTimeout) { + clearTimeout(this.transcriptionDebounceTimeout); + } + + // Process transcription with LLM only after 600ms of silence + this.transcriptionDebounceTimeout = setTimeout(async () => { + const fullText = this.accumulatedTranscription.trim(); + this.accumulatedTranscription = ""; // reset for next batch + + if (fullText.length > 0) { + try { + const sessionHistory = sessionManager.getOptimizedHistory(); + await this.processTranscriptionWithLLM(fullText, sessionHistory); + } catch (error) { + logger.error("Failed to process transcription with LLM", { + error: error.message, + text: fullText.substring(0, 100) + }); + } } - }, 500); + }, 600); }); speechService.on("interim-transcription", (text) => { @@ -235,6 +275,44 @@ class ApplicationController { ipcMain.handle("take-screenshot", () => this.triggerScreenshotOCR()); ipcMain.handle("list-displays", () => captureService.listDisplays()); ipcMain.handle("capture-area", (event, options) => captureService.captureAndProcess(options)); + + ipcMain.handle("upload-document", async () => { + try { + const result = await dialog.showOpenDialog({ + properties: ['openFile'], + filters: [ + { name: 'Documents', extensions: ['pdf', 'docx'] } + ] + }); + + if (result.canceled || result.filePaths.length === 0) { + return { canceled: true }; + } + + const filePath = result.filePaths[0]; + const extension = filePath.split('.').pop().toLowerCase(); + let textContent = ''; + + if (extension === 'pdf') { + const dataBuffer = fs.readFileSync(filePath); + const data = await pdfParse(dataBuffer); + textContent = data.text; + } else if (extension === 'docx') { + const docResult = await mammoth.extractRawText({ path: filePath }); + textContent = docResult.value; + } else { + throw new Error('Unsupported file format'); + } + + // Add the extracted text to the session context + sessionManager.setDocumentContext(textContent); + + return { success: true, length: textContent.length }; + } catch (error) { + logger.error('Failed to parse document', { error: error.message }); + return { success: false, error: error.message }; + } + }); // Provide reliable clipboard write via main process ipcMain.handle("copy-to-clipboard", (event, text) => { @@ -588,32 +666,40 @@ class ApplicationController { } toggleSpeechRecognition() { - const isAvailable = typeof speechService.isAvailable === 'function' ? speechService.isAvailable() : !!speechService.getStatus?.().isInitialized; - if (!isAvailable) { - logger.warn("Speech recognition unavailable; toggle ignored"); - try { - windowManager.broadcastToAllWindows("speech-status", { status: 'Speech recognition unavailable', available: false }); - windowManager.broadcastToAllWindows("speech-availability", { available: false }); - } catch (e) {} - return; - } - const currentStatus = speechService.getStatus(); - if (currentStatus.isRecording) { - try { - speechService.stopRecording(); - windowManager.hideChatWindow(); - logger.info("Speech recognition stopped via global shortcut"); - } catch (error) { - logger.error("Error stopping speech recognition:", error); + try { + const isAvailable = typeof speechService.isAvailable === 'function' ? speechService.isAvailable() : !!speechService.getStatus?.().isInitialized; + if (!isAvailable) { + logger.warn("Speech recognition unavailable; toggle ignored"); + try { + windowManager.broadcastToAllWindows("speech-status", { status: 'Speech recognition unavailable', available: false }); + windowManager.broadcastToAllWindows("speech-availability", { available: false }); + } catch (e) {} + return; } - } else { - try { - speechService.startRecording(); - windowManager.showChatWindow(); - logger.info("Speech recognition started via global shortcut"); - } catch (error) { - logger.error("Error starting speech recognition:", error); + const currentStatus = speechService.getStatus(); + if (currentStatus.isRecording) { + try { + speechService.stopRecording(); + if (typeof windowManager.hideChatWindow === 'function') { + windowManager.hideChatWindow(); + } + logger.info("Speech recognition stopped via global shortcut"); + } catch (error) { + logger.error("Error stopping speech recognition:", error); + } + } else { + try { + speechService.startRecording(); + if (typeof windowManager.showChatWindow === 'function') { + windowManager.showChatWindow(); + } + logger.info("Speech recognition started via global shortcut"); + } catch (error) { + logger.error("Error starting speech recognition:", error); + } } + } catch (outerError) { + logger.error("Critical error in toggleSpeechRecognition", { error: outerError.message, stack: outerError.stack }); } } @@ -1020,6 +1106,16 @@ class ApplicationController { onWillQuit() { globalShortcut.unregisterAll(); + + // Cleanly shut down the speech worker process + try { + if (typeof speechService.shutdown === 'function') { + speechService.shutdown(); + } + } catch (e) { + logger.error('Error shutting down speech worker', { error: e.message }); + } + windowManager.destroyAllWindows(); const sessionStats = sessionManager.getMemoryUsage(); diff --git a/package-lock.json b/package-lock.json index 80fa74fb..f6954471 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,15 +12,18 @@ "dependencies": { "@google/generative-ai": "^0.24.1", "dotenv": "^16.3.1", + "groq-sdk": "^1.2.1", + "mammoth": "^1.12.0", "markdown": "^0.5.0", "marked": "^15.0.12", "microsoft-cognitiveservices-speech-sdk": "^1.40.0", "node-record-lpcm16": "^1.0.1", + "pdf-parse": "^1.1.1", "winston": "^3.17.0", "winston-daily-rotate-file": "^4.7.1" }, "devDependencies": { - "electron": "^29.1.0", + "electron": "^29.4.6", "electron-builder": "^24.13.3" } }, @@ -696,7 +699,6 @@ "version": "0.8.10", "resolved": "https://registry.npmjs.org/@xmldom/xmldom/-/xmldom-0.8.10.tgz", "integrity": "sha512-2WALfTl4xo2SkGCYRt6rDTFfk9R1czmBvUQy12gK2KuRKIpWEhcbbzy8EZXtz/jkRqHX8bFEc6FC1HjX4TUWYw==", - "dev": true, "license": "MIT", "engines": { "node": ">=10.0.0" @@ -1053,7 +1055,6 @@ "version": "1.5.1", "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", - "dev": true, "funding": [ { "type": "github", @@ -1577,7 +1578,6 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", "integrity": "sha512-3lqz5YjWTYnW6dlDa5TLaTCcShfar1e40rmcJVwCBJC6mWlFuj0eCHIElmG1g5kyuJ/GD+8Wn4FFCcz4gJPfaQ==", - "dev": true, "license": "MIT" }, "node_modules/crc": { @@ -1747,6 +1747,12 @@ "license": "MIT", "optional": true }, + "node_modules/dingbat-to-unicode": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/dingbat-to-unicode/-/dingbat-to-unicode-1.0.1.tgz", + "integrity": "sha512-98l0sW87ZT58pU4i61wa2OHwxbiYSbuxsCBozaVnYX2iCnr3bLM3fIes1/ej7h1YdOKuKt/MLs706TVnALA65w==", + "license": "BSD-2-Clause" + }, "node_modules/dir-compare": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/dir-compare/-/dir-compare-3.3.0.tgz", @@ -1884,6 +1890,15 @@ "dev": true, "license": "BSD-2-Clause" }, + "node_modules/duck": { + "version": "0.1.12", + "resolved": "https://registry.npmjs.org/duck/-/duck-0.1.12.tgz", + "integrity": "sha512-wkctla1O6VfP89gQ+J/yDesM0S7B7XLXjKGzXxMDVFg7uEn706niAtyYovKbyq1oT9YwDcly721/iUWoc8MVRg==", + "license": "BSD", + "dependencies": { + "underscore": "^1.13.1" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", @@ -2631,6 +2646,15 @@ "dev": true, "license": "ISC" }, + "node_modules/groq-sdk": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/groq-sdk/-/groq-sdk-1.2.1.tgz", + "integrity": "sha512-dsDSWJRJf+n2dPiCv7zU3IsJbrh7jfSPqi6vc1q0TTK1oUF6bn+wv4P2VFdynkHpuJ0TTJ57vlpT87judPgVPA==", + "license": "Apache-2.0", + "bin": { + "groq-sdk": "bin/cli" + } + }, "node_modules/has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -2812,6 +2836,12 @@ ], "license": "BSD-3-Clause" }, + "node_modules/immediate": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/immediate/-/immediate-3.0.6.tgz", + "integrity": "sha512-XXOFtyqDjNDAQxVfYxuF7g9Il/IbWmmlQg2MYKOH8ExIT1qg6xc4zyS3HaEEATgs1btfzxq15ciUiY7gjSXRGQ==", + "license": "MIT" + }, "node_modules/inflight": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", @@ -2875,9 +2905,7 @@ "version": "1.0.0", "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", "integrity": "sha512-VLghIWNM6ELQzo7zwmcg0NmTVyWKYjvIeM83yjp0wRDTmUnrM678fQbcKBo6n2CJEF0szoG//ytg+TKla89ALQ==", - "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/isbinaryfile": { "version": "5.0.4", @@ -3016,6 +3044,48 @@ "graceful-fs": "^4.1.6" } }, + "node_modules/jszip": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/jszip/-/jszip-3.10.1.tgz", + "integrity": "sha512-xXDvecyTpGLrqFrvkrUSoxxfJI5AH7U8zxxtVclpsUtMCq4JQ290LY8AW5c7Ggnr/Y/oK+bQMbqK2qmtk3pN4g==", + "license": "(MIT OR GPL-3.0-or-later)", + "dependencies": { + "lie": "~3.3.0", + "pako": "~1.0.2", + "readable-stream": "~2.3.6", + "setimmediate": "^1.0.5" + } + }, + "node_modules/jszip/node_modules/readable-stream": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.8.tgz", + "integrity": "sha512-8p0AUk4XODgIewSi0l8Epjs+EVnWiK7NoDIEGU0HhE7+ZyY8D1IMY7odu5lRrFXGg71L15KG8QrPmum45RTtdA==", + "license": "MIT", + "dependencies": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "node_modules/jszip/node_modules/safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==", + "license": "MIT" + }, + "node_modules/jszip/node_modules/string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.1.0" + } + }, "node_modules/keyv": { "version": "4.5.4", "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", @@ -3089,6 +3159,15 @@ "safe-buffer": "~5.1.0" } }, + "node_modules/lie": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/lie/-/lie-3.3.0.tgz", + "integrity": "sha512-UaiMJzeWRlEujzAuw5LokY1L5ecNQYZKfmyZ9L7wDHb/p5etKaxXhohBcrw0EYby+G/NA52vRSN4N39dxHAIwQ==", + "license": "MIT", + "dependencies": { + "immediate": "~3.0.5" + } + }, "node_modules/lodash": { "version": "4.17.21", "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz", @@ -3153,6 +3232,17 @@ "node": ">= 12.0.0" } }, + "node_modules/lop": { + "version": "0.4.2", + "resolved": "https://registry.npmjs.org/lop/-/lop-0.4.2.tgz", + "integrity": "sha512-RefILVDQ4DKoRZsJ4Pj22TxE3omDO47yFpkIBoDKzkqPRISs5U1cnAdg/5583YPkWPaLIYHOKRMQSvjFsO26cw==", + "license": "BSD-2-Clause", + "dependencies": { + "duck": "^0.1.12", + "option": "~0.2.1", + "underscore": "^1.13.1" + } + }, "node_modules/lowercase-keys": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/lowercase-keys/-/lowercase-keys-2.0.0.tgz", @@ -3176,6 +3266,60 @@ "node": ">=10" } }, + "node_modules/mammoth": { + "version": "1.12.0", + "resolved": "https://registry.npmjs.org/mammoth/-/mammoth-1.12.0.tgz", + "integrity": "sha512-cwnK1RIcRdDMi2HRx2EXGYlxqIEh0Oo3bLhorgnsVJi2UkbX1+jKxuBNR9PC5+JaX7EkmJxFPmo6mjLpqShI2w==", + "license": "BSD-2-Clause", + "dependencies": { + "@xmldom/xmldom": "^0.8.6", + "argparse": "~1.0.3", + "base64-js": "^1.5.1", + "bluebird": "~3.4.0", + "dingbat-to-unicode": "^1.0.1", + "jszip": "^3.7.1", + "lop": "^0.4.2", + "path-is-absolute": "^1.0.0", + "underscore": "^1.13.1", + "xmlbuilder": "^10.0.0" + }, + "bin": { + "mammoth": "bin/mammoth" + }, + "engines": { + "node": ">=12.0.0" + } + }, + "node_modules/mammoth/node_modules/argparse": { + "version": "1.0.10", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz", + "integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==", + "license": "MIT", + "dependencies": { + "sprintf-js": "~1.0.2" + } + }, + "node_modules/mammoth/node_modules/bluebird": { + "version": "3.4.7", + "resolved": "https://registry.npmjs.org/bluebird/-/bluebird-3.4.7.tgz", + "integrity": "sha512-iD3898SR7sWVRHbiQv+sHUtHnMvC1o3nW5rAcqnq3uOn07DSAppZYUkIGslDz6gXC7HfunPe7YVBgoEJASPcHA==", + "license": "MIT" + }, + "node_modules/mammoth/node_modules/sprintf-js": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz", + "integrity": "sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==", + "license": "BSD-3-Clause" + }, + "node_modules/mammoth/node_modules/xmlbuilder": { + "version": "10.1.1", + "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-10.1.1.tgz", + "integrity": "sha512-OyzrcFLL/nb6fMGHbiRDuPup9ljBycsdCypwuyg5AAHvyWzGfChJpCXMG88AGTIMFhGZ9RccFN1e6lhg3hkwKg==", + "license": "MIT", + "engines": { + "node": ">=4.0" + } + }, "node_modules/markdown": { "version": "0.5.0", "resolved": "https://registry.npmjs.org/markdown/-/markdown-0.5.0.tgz", @@ -3417,6 +3561,12 @@ "license": "MIT", "optional": true }, + "node_modules/node-ensure": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/node-ensure/-/node-ensure-0.0.0.tgz", + "integrity": "sha512-DRI60hzo2oKN1ma0ckc6nQWlHU69RH6xN0sjQTjMpChPfTYvKZdcQFfdYK2RWbJcKyUizSIy/l8OTGxMAM1QDw==", + "license": "MIT" + }, "node_modules/node-record-lpcm16": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/node-record-lpcm16/-/node-record-lpcm16-1.0.1.tgz", @@ -3516,6 +3666,12 @@ "fn.name": "1.x.x" } }, + "node_modules/option": { + "version": "0.2.4", + "resolved": "https://registry.npmjs.org/option/-/option-0.2.4.tgz", + "integrity": "sha512-pkEqbDyl8ou5cpq+VsnQbe/WlEy5qS7xPzMS1U55OCG9KPvwFD46zDbxQIj3egJSFc3D+XhYOPUzz49zQAVy7A==", + "license": "BSD-2-Clause" + }, "node_modules/p-cancelable": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/p-cancelable/-/p-cancelable-2.1.1.tgz", @@ -3533,11 +3689,16 @@ "dev": true, "license": "BlueOak-1.0.0" }, + "node_modules/pako": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", + "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==", + "license": "(MIT AND Zlib)" + }, "node_modules/path-is-absolute": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", - "dev": true, "license": "MIT", "engines": { "node": ">=0.10.0" @@ -3577,6 +3738,28 @@ "dev": true, "license": "ISC" }, + "node_modules/pdf-parse": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/pdf-parse/-/pdf-parse-1.1.1.tgz", + "integrity": "sha512-v6ZJ/efsBpGrGGknjtq9J/oC8tZWq0KWL5vQrk2GlzLEQPUDB1ex+13Rmidl1neNN358Jn9EHZw5y07FFtaC7A==", + "license": "MIT", + "dependencies": { + "debug": "^3.1.0", + "node-ensure": "^0.0.0" + }, + "engines": { + "node": ">=6.8.1" + } + }, + "node_modules/pdf-parse/node_modules/debug": { + "version": "3.2.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-3.2.7.tgz", + "integrity": "sha512-CFjzYYAi4ThfiQvizrFQevTTXHtnCqWfe7x1AhgEscTz6ZbLbfoLRLPugTQyBth6f8ZERVUSyWHFD/7Wu4t1XQ==", + "license": "MIT", + "dependencies": { + "ms": "^2.1.1" + } + }, "node_modules/pend": { "version": "1.2.0", "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz", @@ -3603,9 +3786,7 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz", "integrity": "sha512-3ouUOpQhtgrbOa17J7+uxOTpITYWaGP7/AhoR3+A+/1e9skrzelGi/dXzEYyvbxubEF6Wn2ypscTKiKJFFn1ag==", - "dev": true, - "license": "MIT", - "peer": true + "license": "MIT" }, "node_modules/progress": { "version": "2.0.3", @@ -3865,6 +4046,12 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/setimmediate": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/setimmediate/-/setimmediate-1.0.5.tgz", + "integrity": "sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA==", + "license": "MIT" + }, "node_modules/shebang-command": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", @@ -4263,6 +4450,12 @@ "node": ">=14.17" } }, + "node_modules/underscore": { + "version": "1.13.8", + "resolved": "https://registry.npmjs.org/underscore/-/underscore-1.13.8.tgz", + "integrity": "sha512-DXtD3ZtEQzc7M8m4cXotyHR+FAS18C64asBYY5vqZexfYryNNnDc02W4hKg3rdQuqOYas1jkseX0+nZXjTXnvQ==", + "license": "MIT" + }, "node_modules/undici-types": { "version": "6.21.0", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", diff --git a/package.json b/package.json index 3d056809..322d2c1f 100644 --- a/package.json +++ b/package.json @@ -32,15 +32,18 @@ "dependencies": { "@google/generative-ai": "^0.24.1", "dotenv": "^16.3.1", + "groq-sdk": "^1.2.1", + "mammoth": "^1.12.0", "markdown": "^0.5.0", "marked": "^15.0.12", "microsoft-cognitiveservices-speech-sdk": "^1.40.0", "node-record-lpcm16": "^1.0.1", + "pdf-parse": "^1.1.1", "winston": "^3.17.0", "winston-daily-rotate-file": "^4.7.1" }, "devDependencies": { - "electron": "^29.1.0", + "electron": "^29.4.6", "electron-builder": "^24.13.3" }, "build": { diff --git a/preload.js b/preload.js index ee51014b..d9240b4c 100644 --- a/preload.js +++ b/preload.js @@ -11,6 +11,9 @@ contextBridge.exposeInMainWorld('electronAPI', { stopSpeechRecognition: () => ipcRenderer.invoke('stop-speech-recognition'), getSpeechAvailability: () => ipcRenderer.invoke('get-speech-availability'), + // Document context + uploadDocument: () => ipcRenderer.invoke('upload-document'), + // Window management showAllWindows: () => ipcRenderer.invoke('show-all-windows'), hideAllWindows: () => ipcRenderer.invoke('hide-all-windows'), diff --git a/speech-worker.js b/speech-worker.js new file mode 100644 index 00000000..5df44efb --- /dev/null +++ b/speech-worker.js @@ -0,0 +1,469 @@ +/** + * speech-worker.js — Pure Node.js worker for Azure Speech SDK + * + * This file runs as a forked child process (child_process.fork) so that + * the Azure Speech SDK's native networking uses Node's TLS stack instead + * of Electron/Chromium's boringssl, which was causing CERTIFICATE_VERIFY_FAILED + * errors and crashing the app when Alt+R was pressed. + * + * Audio capture: Uses `child_process.spawn` to capture audio via `sox`. + * On Windows it uses `-t waveaudio default`, and on other platforms `-d`. + * The raw PCM stream is written into the Azure SDK's PushAudioInputStream. + * + * Communication with the main process is via IPC messages: + * Main → Worker: { type: 'start' | 'stop' | 'test' | 'status' | 'init', ... } + * Worker → Main: { type: 'recording-started' | 'recording-stopped' | 'transcription' + * | 'interim-transcription' | 'error' | 'status' | 'canceled' + * | 'session-started' | 'session-stopped' | 'init-result' + * | 'log', ... } + */ + +'use strict'; + +// ── Deps ──────────────────────────────────────────────────────────────── +const sdk = require('microsoft-cognitiveservices-speech-sdk'); +const { spawn } = require('child_process'); + +// ── State ─────────────────────────────────────────────────────────────── +let recognizer = null; +let pushStream = null; +let audioConfig = null; +let speechConfig = null; +let recordingProcess = null; +let isRecording = false; +let sessionStartTime = null; +let retryCount = 0; +const maxRetries = 3; +let _audioDataLogged = false; +let available = false; + +// ── Logging helper (sends to main process) ────────────────────────────── +function log(level, message, data) { + try { + process.send({ type: 'log', level, message, data: data || {} }); + } catch (_) { + // If IPC is broken, just write to stderr so we don't lose the info + process.stderr.write(`[speech-worker] ${level}: ${message} ${JSON.stringify(data || {})}\n`); + } +} + +// ── Initialisation ────────────────────────────────────────────────────── +function initialize(config) { + try { + const subscriptionKey = config.subscriptionKey; + const region = config.region; + + if (!subscriptionKey || !region) { + available = false; + process.send({ type: 'init-result', available: false, reason: 'Missing Azure Speech credentials' }); + return; + } + + speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, region); + + // Language & output format + const lang = (config.azure && config.azure.language) || 'en-US'; + speechConfig.speechRecognitionLanguage = lang; + speechConfig.outputFormat = sdk.OutputFormat.Detailed; + + // Timeouts + speechConfig.setProperty(sdk.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs, '5000'); + speechConfig.setProperty(sdk.PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs, '2000'); + speechConfig.setProperty(sdk.PropertyId.Speech_SegmentationSilenceTimeoutMs, '2000'); + + if (config.azure && config.azure.enableDictation) { + speechConfig.enableDictation(); + } + if (config.azure && config.azure.enableAudioLogging) { + speechConfig.enableAudioLogging(); + } + + available = true; + log('info', 'Azure Speech service initialized in worker', { region, language: lang }); + process.send({ type: 'init-result', available: true }); + } catch (error) { + available = false; + log('error', 'Failed to initialize Azure Speech client in worker', { error: error.message, stack: error.stack }); + process.send({ type: 'init-result', available: false, reason: error.message }); + } +} + +// ── Cleanup ───────────────────────────────────────────────────────────── +function cleanup() { + if (recognizer) { + try { recognizer.close(); } catch (_) {} + recognizer = null; + } + if (audioConfig) { + try { + if (typeof audioConfig.close === 'function') { + const r = audioConfig.close(); + if (r && typeof r.then === 'function') r.catch(() => {}); + } + } catch (_) {} + audioConfig = null; + } + if (recordingProcess) { + try { recordingProcess.kill(); } catch (_) {} + recordingProcess = null; + } + if (pushStream) { + try { + if (typeof pushStream.close === 'function') { + const r = pushStream.close(); + if (r && typeof r.then === 'function') r.catch(() => {}); + } + } catch (_) {} + pushStream = null; + } + _audioDataLogged = false; +} + +// ── Microphone capture ────────────────────────────────────────────────── +function startMicrophoneCapture() { + if (!pushStream) return; + + try { + const isWindows = process.platform === 'win32'; + const cmd = 'sox'; + let args = []; + + // sox format arguments: raw PCM, 16kHz, 16-bit, mono, signed integer + const formatArgs = ['-b', '16', '-e', 'signed', '-c', '1', '-r', '16000', '-t', 'raw', '-']; + + if (isWindows) { + // Windows needs waveaudio driver explicitly + args = ['-t', 'waveaudio', 'default', '-q', ...formatArgs]; + } else { + // Unix uses the default device flag + args = ['-d', '-q', ...formatArgs]; + } + + recordingProcess = spawn(cmd, args); + + recordingProcess.on('error', (error) => { + log('error', 'Failed to spawn sox', { error: error.message }); + process.send({ type: 'error', error: `Microphone capture failed (sox error): ${error.message}` }); + handleAudioError(); + }); + + recordingProcess.on('close', (code) => { + if (code !== 0 && code !== null && isRecording) { + log('warn', `sox exited with code ${code}`); + } + }); + + recordingProcess.stdout.on('data', (chunk) => { + if (pushStream && isRecording) { + try { + pushStream.write(chunk); + if (!_audioDataLogged) { + _audioDataLogged = true; + log('debug', 'First audio chunk received via sox', { size: chunk.length }); + } + } catch (err) { + log('error', 'Error writing audio data to push stream', { error: err.message }); + } + } + }); + + log('info', `Microphone capture started via sox (${isWindows ? 'waveaudio' : 'default device'})`); + } catch (error) { + log('error', 'Failed to start microphone capture', { error: error.message, stack: error.stack }); + process.send({ type: 'error', error: `Microphone capture failed: ${error.message}` }); + handleAudioError(); + } +} + +function handleAudioError() { + if (recordingProcess) { + try { recordingProcess.kill(); } catch (_) {} + recordingProcess = null; + } +} + +// ── Start recording ───────────────────────────────────────────────────── +function startRecording() { + try { + if (!speechConfig) { + process.send({ type: 'error', error: 'Azure Speech client not initialized' }); + return; + } + if (isRecording) { + log('warn', 'Recording already in progress'); + return; + } + + sessionStartTime = Date.now(); + retryCount = 0; + attemptRecording(); + } catch (error) { + log('error', 'Critical error in startRecording', { error: error.message, stack: error.stack }); + process.send({ type: 'error', error: `Speech recognition failed to start: ${error.message}` }); + isRecording = false; + } +} + +function attemptRecording() { + try { + isRecording = true; + process.send({ type: 'recording-started' }); + + cleanup(); + + try { + pushStream = sdk.AudioInputStream.createPushStream(); + audioConfig = sdk.AudioConfig.fromStreamInput(pushStream); + startMicrophoneCapture(); + } catch (audioError) { + log('error', 'Failed to create audio config', { error: audioError.message }); + process.send({ type: 'error', error: 'Audio configuration failed.' }); + isRecording = false; + return; + } + + // Create recognizer + try { + recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig); + } catch (recErr) { + log('error', 'Failed to create speech recognizer', { error: recErr.message }); + process.send({ type: 'error', error: `Failed to create recognizer: ${recErr.message}` }); + isRecording = false; + cleanup(); + return; + } + + // ── Event handlers ────────────────────────────────────────────────── + recognizer.recognizing = (_s, e) => { + try { + if (e.result.reason === sdk.ResultReason.RecognizingSpeech) { + log('debug', 'Interim transcription', { text: e.result.text }); + process.send({ type: 'interim-transcription', text: e.result.text }); + } + } catch (err) { + log('error', 'Error in recognizing handler', { error: err.message }); + } + }; + + recognizer.recognized = (_s, e) => { + try { + if (e.result.reason === sdk.ResultReason.RecognizedSpeech) { + const dur = Date.now() - sessionStartTime; + if (e.result.text && e.result.text.trim().length > 0) { + log('info', 'Final transcription', { text: e.result.text, sessionDuration: `${dur}ms` }); + process.send({ type: 'transcription', text: e.result.text }); + } else { + log('debug', 'Empty transcription ignored'); + } + } else if (e.result.reason === sdk.ResultReason.NoMatch) { + log('debug', 'No speech pattern detected'); + } + } catch (err) { + log('error', 'Error in recognized handler', { error: err.message }); + } + }; + + recognizer.canceled = (_s, e) => { + log('warn', 'Recognition canceled', { + reason: e.reason, + errorCode: e.errorCode, + errorDetails: e.errorDetails + }); + + if (e.reason === sdk.CancellationReason.Error) { + let userMsg; + if (e.errorDetails && e.errorDetails.includes('1006')) { + userMsg = 'Network connection failed. Please check your internet connection.'; + } else if (e.errorDetails && e.errorDetails.includes('InvalidServiceCredentials')) { + userMsg = 'Invalid Azure Speech credentials. Please check AZURE_SPEECH_KEY and AZURE_SPEECH_REGION.'; + } else if (e.errorDetails && e.errorDetails.includes('Forbidden')) { + userMsg = 'Access denied. Please check your Azure Speech service subscription and region.'; + } else if (e.errorDetails && e.errorDetails.includes('AudioInputMicrophone_InitializationFailure')) { + userMsg = 'Microphone initialization failed. Please check microphone permissions and availability.'; + } else { + userMsg = `Recognition error: ${e.errorDetails}`; + } + process.send({ type: 'error', error: userMsg }); + + // Retry for transient errors + if (retryCount < maxRetries && e.errorDetails && + (e.errorDetails.includes('1006') || e.errorDetails.includes('timeout') || e.errorDetails.includes('network'))) { + retryCount++; + log('info', `Retrying recognition (attempt ${retryCount}/${maxRetries})`); + setTimeout(() => { + if (!isRecording) attemptRecording(); + }, 1000 * retryCount); + return; + } + + // Persistent credential / network errors → notify UI to stop gracefully + if (e.errorDetails && + (e.errorDetails.includes('InvalidServiceCredentials') || e.errorDetails.includes('Forbidden'))) { + process.send({ type: 'fatal-error', error: userMsg }); + } + } + stopRecording(); + }; + + recognizer.sessionStarted = (_s, e) => { + log('info', 'Recognition session started', { sessionId: e.sessionId }); + process.send({ type: 'session-started', sessionId: e.sessionId }); + }; + + recognizer.sessionStopped = (_s, e) => { + log('info', 'Recognition session ended', { sessionId: e.sessionId }); + process.send({ type: 'session-stopped', sessionId: e.sessionId }); + stopRecording(); + }; + + // ── Start continuous recognition ──────────────────────────────────── + const startTimeout = setTimeout(() => { + log('error', 'Recognition start timeout'); + process.send({ type: 'error', error: 'Speech recognition start timeout. Please try again.' }); + stopRecording(); + }, 10000); + + recognizer.startContinuousRecognitionAsync( + () => { + clearTimeout(startTimeout); + log('info', 'Continuous speech recognition started successfully'); + }, + (error) => { + clearTimeout(startTimeout); + log('error', 'Failed to start continuous recognition', { error: error.toString(), retryCount }); + + if (retryCount < maxRetries) { + retryCount++; + log('info', `Retrying recognition start (attempt ${retryCount}/${maxRetries})`); + isRecording = false; + setTimeout(() => { attemptRecording(); }, 2000 * retryCount); + } else { + process.send({ type: 'error', error: `Recognition startup failed after ${maxRetries} attempts: ${error}` }); + isRecording = false; + } + } + ); + } catch (error) { + log('error', 'Failed to start recording session', { error: error.message, stack: error.stack }); + process.send({ type: 'error', error: `Recording startup failed: ${error.message}` }); + isRecording = false; + } +} + +// ── Stop recording ────────────────────────────────────────────────────── +function stopRecording() { + if (!isRecording) return; + + isRecording = false; + const dur = sessionStartTime ? Date.now() - sessionStartTime : 0; + log('info', 'Stopping speech recognition', { sessionDuration: `${dur}ms` }); + + if (recognizer) { + try { + recognizer.stopContinuousRecognitionAsync( + () => { + log('info', 'Speech recognition stopped successfully'); + process.send({ type: 'recording-stopped' }); + cleanup(); + }, + (error) => { + log('error', 'Error stopping recognition', { error: error.toString() }); + process.send({ type: 'recording-stopped' }); + cleanup(); + } + ); + } catch (error) { + log('error', 'Error stopping recognizer', { error: error.message }); + process.send({ type: 'recording-stopped' }); + cleanup(); + } + } else { + process.send({ type: 'recording-stopped' }); + cleanup(); + } +} + +// ── Status ────────────────────────────────────────────────────────────── +function getStatus() { + return { + isRecording, + isInitialized: !!speechConfig, + available, + sessionDuration: sessionStartTime ? Date.now() - sessionStartTime : 0, + retryCount + }; +} + +// ── Test connection ───────────────────────────────────────────────────── +function testConnection() { + if (!speechConfig) { + process.send({ type: 'test-result', success: false, message: 'Speech service not initialized' }); + return; + } + try { + // Simple validation — just creating a recognizer tests credential format + const testPush = sdk.AudioInputStream.createPushStream(); + const testAudio = sdk.AudioConfig.fromStreamInput(testPush); + const testRec = new sdk.SpeechRecognizer(speechConfig, testAudio); + testRec.close(); + try { testAudio.close(); } catch (_) {} + try { testPush.close(); } catch (_) {} + process.send({ type: 'test-result', success: true, message: 'Connection test successful' }); + } catch (error) { + process.send({ type: 'test-result', success: false, message: error.message }); + } +} + +// ── IPC message handler ───────────────────────────────────────────────── +process.on('message', (msg) => { + try { + switch (msg.type) { + case 'init': + initialize(msg.config); + break; + case 'start': + startRecording(); + break; + case 'stop': + stopRecording(); + break; + case 'test': + testConnection(); + break; + case 'status': + process.send({ type: 'status', status: getStatus() }); + break; + case 'shutdown': + stopRecording(); + cleanup(); + log('info', 'Worker shutting down'); + setTimeout(() => process.exit(0), 500); + break; + default: + log('warn', `Unknown message type: ${msg.type}`); + } + } catch (error) { + log('error', `Error handling message ${msg.type}`, { error: error.message, stack: error.stack }); + process.send({ type: 'error', error: `Worker error: ${error.message}` }); + } +}); + +// ── Graceful exit ─────────────────────────────────────────────────────── +process.on('SIGTERM', () => { + stopRecording(); + cleanup(); + process.exit(0); +}); + +process.on('uncaughtException', (error) => { + log('error', 'Uncaught exception in speech worker', { error: error.message, stack: error.stack }); + process.send({ type: 'error', error: `Worker crash: ${error.message}` }); + // Don't exit — let main process decide +}); + +process.on('unhandledRejection', (reason) => { + const msg = reason instanceof Error ? reason.message : String(reason); + log('error', 'Unhandled rejection in speech worker', { error: msg }); +}); + +log('info', 'Speech worker process started', { pid: process.pid }); diff --git a/src/core/config.js b/src/core/config.js index 12dfff1d..ed02ab0c 100644 --- a/src/core/config.js +++ b/src/core/config.js @@ -39,17 +39,16 @@ class ConfigManager { }, llm: { - gemini: { - model: 'gemini-2.5-flash', - maxRetries: 3, + groq: { + model: 'llama-3.1-8b-instant', + visionModel: 'llama-3.2-11b-vision-preview', + maxRetries: 5, timeout: 60000, fallbackEnabled: true, - enableFallbackMethod: true, generation: { temperature: 0.7, - topK: 32, - topP: 0.9, - maxOutputTokens: 4096 + max_tokens: 150, + top_p: 0.95 } } }, diff --git a/src/managers/session.manager.js b/src/managers/session.manager.js index 9c9d971e..198bc044 100644 --- a/src/managers/session.manager.js +++ b/src/managers/session.manager.js @@ -596,6 +596,24 @@ class SessionManager { utilizationPercent: Math.round((this.sessionMemory.length / this.maxSize) * 100) }; } + + setDocumentContext(text) { + this.documentContext = text; + logger.info('Document context set', { length: text?.length || 0 }); + + if (text) { + this.addConversationEvent({ + role: 'system', + content: `User uploaded a reference document (${text.length} characters).`, + action: 'document_upload', + metadata: { documentLength: text.length } + }); + } + } + + getDocumentContext() { + return this.documentContext || null; + } } module.exports = new SessionManager(); \ No newline at end of file diff --git a/src/services/llm.service.js b/src/services/llm.service.js index 0384691d..b87c0ff6 100644 --- a/src/services/llm.service.js +++ b/src/services/llm.service.js @@ -1,4 +1,4 @@ -const { GoogleGenerativeAI } = require('@google/generative-ai'); +const Groq = require('groq-sdk'); const logger = require('../core/logger').createServiceLogger('LLM'); const config = require('../core/config'); const { promptLoader } = require('../../prompt-loader'); @@ -6,7 +6,6 @@ const { promptLoader } = require('../../prompt-loader'); class LLMService { constructor() { this.client = null; - this.model = null; this.isInitialized = false; this.requestCount = 0; this.errorCount = 0; @@ -15,44 +14,35 @@ class LLMService { } initializeClient() { - const apiKey = config.getApiKey('GEMINI'); + const apiKey = config.getApiKey('GROQ'); if (!apiKey || apiKey === 'your-api-key-here') { - logger.warn('Gemini API key not configured', { - keyExists: !!apiKey, - isPlaceholder: apiKey === 'your-api-key-here' + logger.warn('Groq API key not configured', { + keyExists: !!apiKey }); return; } try { - this.client = new GoogleGenerativeAI(apiKey); - - // Use the correct model name for v1 API - const modelName = config.get('llm.gemini.model'); - this.model = this.client.getGenerativeModel({ - model: modelName, - generationConfig: this.getGenerationConfig() - }); + this.client = new Groq({ apiKey }); this.isInitialized = true; - logger.info('Gemini AI client initialized successfully', { - model: modelName + logger.info('Groq AI client initialized successfully', { + model: config.get('llm.groq.model') }); } catch (error) { - logger.error('Failed to initialize Gemini client', { + logger.error('Failed to initialize Groq client', { error: error.message }); } } getGenerationConfig(overrides = {}) { - const defaults = config.get('llm.gemini.generation') || {}; + const defaults = config.get('llm.groq.generation') || {}; const fallback = { temperature: 0.7, - topK: 40, - topP: 0.95, - maxOutputTokens: 4096 + max_tokens: 4096, + top_p: 0.95 }; const merged = { ...fallback, ...defaults, ...overrides }; @@ -61,63 +51,12 @@ class LLMService { ); } - applyGenerationDefaults(request, overrides = {}) { - request.generationConfig = this.getGenerationConfig({ ...(request.generationConfig || {}), ...overrides }); - return request; - } - - extractTextFromCandidates(response) { - const candidates = Array.isArray(response?.candidates) - ? response.candidates - : Array.isArray(response) - ? response - : []; - - if (!candidates.length) { - throw new Error('No candidates in Gemini response'); - } - - const candidateWithText = candidates.find(candidate => { - const parts = candidate?.content?.parts; - return Array.isArray(parts) && parts.some(part => typeof part.text === 'string' && part.text.trim().length > 0); - }); - - if (!candidateWithText) { - const finishReasons = candidates.map(c => c.finishReason || 'unknown').join(', '); - throw new Error(`No text parts in candidates. Finish reasons: ${finishReasons}`); - } - - const textParts = candidateWithText.content.parts - .filter(part => typeof part.text === 'string' && part.text.trim().length > 0) - .map(part => part.text.trim()); - - if (!textParts.length) { - throw new Error(`Candidate parts missing text after filtering: ${JSON.stringify(candidateWithText)}`); - } - - const text = textParts.join('\n'); - - return { - text, - candidate: candidateWithText, - finishReason: candidateWithText.finishReason || null - }; - } - /** - * Process an image directly with Gemini using the active skill prompt. - * The image buffer is sent as inlineData alongside a concise instruction. - * For image-based queries, we include the skill prompt (e.g., DSA) as systemInstruction. - * @param {Buffer} imageBuffer - PNG/JPEG image bytes - * @param {string} mimeType - e.g., 'image/png' or 'image/jpeg' - * @param {string} activeSkill - current skill (e.g. 'dsa') - * @param {Array} sessionMemory - optional (not required for image) - * @param {string|null} programmingLanguage - optional language context for skills that need it - * @returns {Promise<{response: string, metadata: object}>} + * Process an image directly with Groq (using vision model) */ async processImageWithSkill(imageBuffer, mimeType, activeSkill, sessionMemory = [], programmingLanguage = null) { if (!this.isInitialized) { - throw new Error('LLM service not initialized. Check Gemini API key configuration.'); + throw new Error('LLM service not initialized. Check Groq API key configuration.'); } if (!imageBuffer || !Buffer.isBuffer(imageBuffer)) { @@ -128,58 +67,28 @@ class LLMService { this.requestCount++; try { - // Build system instruction using the skill prompt (with optional language injection) const { promptLoader } = require('../../prompt-loader'); const skillPrompt = promptLoader.getSkillPrompt(activeSkill, programmingLanguage) || ''; + + const base64Image = imageBuffer.toString('base64'); + const imageUrl = `data:${mimeType};base64,${base64Image}`; - // Build request with text + image parts - const base64 = imageBuffer.toString('base64'); - - const request = { - contents: [ - { - role: 'user', - parts: [ - { text: this.formatImageInstruction(activeSkill, programmingLanguage) }, - { inlineData: { data: base64, mimeType } } - ] - } - ] - }; - - this.applyGenerationDefaults(request); + const messages = []; if (skillPrompt && skillPrompt.trim().length > 0) { - request.systemInstruction = { parts: [{ text: skillPrompt }] }; + messages.push({ role: 'system', content: skillPrompt }); } - // Execute with retries/timeout - try alternative method first for network reliability - let responseText; - const preferAlternative = !!config.get('llm.gemini.enableFallbackMethod'); - try { - if (preferAlternative) { - logger.debug('Attempting alternative HTTPS method first for reliability'); - responseText = await this.executeAlternativeRequest(request); - } else { - responseText = await this.executeRequest(request); - } - } catch (error) { - const secondaryLabel = preferAlternative ? 'primary SDK method' : 'alternative HTTPS method'; - logger.warn(`${preferAlternative ? 'Alternative' : 'Primary'} method failed, trying ${secondaryLabel}`, { error: error.message }); - const secondaryFn = preferAlternative ? this.executeRequest.bind(this) : this.executeAlternativeRequest.bind(this); - - try { - responseText = await secondaryFn(request); - } catch (secondaryError) { - logger.error('Both Gemini request methods failed', { - firstError: error.message, - secondError: secondaryError.message - }); - throw secondaryError; - } - } + messages.push({ + role: 'user', + content: [ + { type: 'text', text: this.formatImageInstruction(activeSkill, programmingLanguage) }, + { type: 'image_url', image_url: { url: imageUrl } } + ] + }); + + const responseText = await this.executeRequest(messages, true); - // Enforce language in code fences if provided const finalResponse = programmingLanguage ? this.enforceProgrammingLanguage(responseText, programmingLanguage) : responseText; @@ -212,7 +121,7 @@ class LLMService { requestId: this.requestCount }); - if (config.get('llm.gemini.fallbackEnabled')) { + if (config.get('llm.groq.fallbackEnabled')) { return this.generateFallbackResponse('[image]', activeSkill); } throw error; @@ -226,7 +135,7 @@ class LLMService { async processTextWithSkill(text, activeSkill, sessionMemory = [], programmingLanguage = null) { if (!this.isInitialized) { - throw new Error('LLM service not initialized. Check Gemini API key configuration.'); + throw new Error('LLM service not initialized. Check Groq API key configuration.'); } const startTime = Date.now(); @@ -237,50 +146,20 @@ class LLMService { activeSkill, textLength: text.length, hasSessionMemory: sessionMemory.length > 0, - programmingLanguage: programmingLanguage || 'not specified', requestId: this.requestCount }); - const geminiRequest = this.buildGeminiRequest(text, activeSkill, sessionMemory, programmingLanguage); - - const preferAlternative = !!config.get('llm.gemini.enableFallbackMethod'); - let response; - try { - if (preferAlternative) { - logger.debug('Attempting alternative HTTPS method first for text processing'); - response = await this.executeAlternativeRequest(geminiRequest); - } else { - response = await this.executeRequest(geminiRequest); - } - } catch (error) { - const secondaryLabel = preferAlternative ? 'primary SDK method' : 'alternative HTTPS method'; - logger.warn(`${preferAlternative ? 'Alternative' : 'Primary'} method failed, trying ${secondaryLabel}`, { - error: error.message, - requestId: this.requestCount - }); - const secondaryFn = preferAlternative ? this.executeRequest.bind(this) : this.executeAlternativeRequest.bind(this); - try { - response = await secondaryFn(geminiRequest); - } catch (secondaryError) { - logger.error('Both Gemini request methods failed for text processing', { - firstError: error.message, - secondError: secondaryError.message, - requestId: this.requestCount - }); - throw secondaryError; - } - } + const messages = this.buildGroqRequest(text, activeSkill, sessionMemory, programmingLanguage); + const responseText = await this.executeRequest(messages); - // Enforce language in code fences if programmingLanguage specified const finalResponse = programmingLanguage - ? this.enforceProgrammingLanguage(response, programmingLanguage) - : response; + ? this.enforceProgrammingLanguage(responseText, programmingLanguage) + : responseText; logger.logPerformance('LLM text processing', startTime, { activeSkill, textLength: text.length, responseLength: finalResponse.length, - programmingLanguage: programmingLanguage || 'not specified', requestId: this.requestCount }); @@ -299,11 +178,10 @@ class LLMService { logger.error('LLM processing failed', { error: error.message, activeSkill, - programmingLanguage: programmingLanguage || 'not specified', requestId: this.requestCount }); - if (config.get('llm.gemini.fallbackEnabled')) { + if (config.get('llm.groq.fallbackEnabled')) { return this.generateFallbackResponse(text, activeSkill); } @@ -313,7 +191,7 @@ class LLMService { async processTranscriptionWithIntelligentResponse(text, activeSkill, sessionMemory = [], programmingLanguage = null) { if (!this.isInitialized) { - throw new Error('LLM service not initialized. Check Gemini API key configuration.'); + throw new Error('LLM service not initialized. Check Groq API key configuration.'); } const startTime = Date.now(); @@ -323,53 +201,15 @@ class LLMService { logger.info('Processing transcription with intelligent response', { activeSkill, textLength: text.length, - hasSessionMemory: sessionMemory.length > 0, - programmingLanguage: programmingLanguage || 'not specified', requestId: this.requestCount }); - const geminiRequest = this.buildIntelligentTranscriptionRequest(text, activeSkill, sessionMemory, programmingLanguage); - - const preferAlternative = !!config.get('llm.gemini.enableFallbackMethod'); - let response; - try { - if (preferAlternative) { - logger.debug('Attempting alternative HTTPS method first for transcription processing'); - response = await this.executeAlternativeRequest(geminiRequest); - } else { - response = await this.executeRequest(geminiRequest); - } - } catch (error) { - const secondaryLabel = preferAlternative ? 'primary SDK method' : 'alternative HTTPS method'; - logger.warn(`${preferAlternative ? 'Alternative' : 'Primary'} method failed, trying ${secondaryLabel}`, { - error: error.message, - requestId: this.requestCount - }); - const secondaryFn = preferAlternative ? this.executeRequest.bind(this) : this.executeAlternativeRequest.bind(this); - try { - response = await secondaryFn(geminiRequest); - } catch (secondaryError) { - logger.error('Both Gemini request methods failed for transcription processing', { - firstError: error.message, - secondError: secondaryError.message, - requestId: this.requestCount - }); - throw secondaryError; - } - } + const messages = this.buildIntelligentTranscriptionRequest(text, activeSkill, sessionMemory, programmingLanguage); + const responseText = await this.executeRequest(messages); - // Enforce language in code fences if programmingLanguage specified const finalResponse = programmingLanguage - ? this.enforceProgrammingLanguage(response, programmingLanguage) - : response; - - logger.logPerformance('LLM transcription processing', startTime, { - activeSkill, - textLength: text.length, - responseLength: finalResponse.length, - programmingLanguage: programmingLanguage || 'not specified', - requestId: this.requestCount - }); + ? this.enforceProgrammingLanguage(responseText, programmingLanguage) + : responseText; return { response: finalResponse, @@ -387,11 +227,10 @@ class LLMService { logger.error('LLM transcription processing failed', { error: error.message, activeSkill, - programmingLanguage: programmingLanguage || 'not specified', requestId: this.requestCount }); - if (config.get('llm.gemini.fallbackEnabled')) { + if (config.get('llm.groq.fallbackEnabled')) { return this.generateIntelligentFallbackResponse(text, activeSkill); } @@ -399,10 +238,6 @@ class LLMService { } } - /** - * Normalize all triple-backtick code fences to the selected programming language tag. - * Does not alter the inner code; only ensures fence language tags are correct. - */ enforceProgrammingLanguage(text, programmingLanguage) { try { if (!text || !programmingLanguage) return text; @@ -410,34 +245,28 @@ class LLMService { const fenceTagMap = { cpp: 'cpp', c: 'c', python: 'python', java: 'java', javascript: 'javascript', js: 'javascript' }; const fenceTag = fenceTagMap[norm] || norm || 'text'; - // Replace all triple-backtick fences' language token with the selected tag const replacedBackticks = text.replace(/```([^\n]*)\n/g, (match, info) => { const current = (info || '').trim(); - // If already the desired fenceTag as the first token, keep as is if (current.split(/\s+/)[0].toLowerCase() === fenceTag) return match; return '```' + fenceTag + '\n'; }); - // Optionally normalize tildes fences to backticks with correct tag const normalizedTildes = replacedBackticks.replace(/~~~([^\n]*)\n/g, () => '```' + fenceTag + '\n'); - return normalizedTildes; } catch (_) { return text; } } - buildGeminiRequest(text, activeSkill, sessionMemory, programmingLanguage) { - // Check if we have the new conversation history format + buildGroqRequest(text, activeSkill, sessionMemory, programmingLanguage) { const sessionManager = require('../managers/session.manager'); if (sessionManager && typeof sessionManager.getConversationHistory === 'function') { - const conversationHistory = sessionManager.getConversationHistory(15); + const conversationHistory = sessionManager.getConversationHistory(4); const skillContext = sessionManager.getSkillContext(activeSkill, programmingLanguage); - return this.buildGeminiRequestWithHistory(text, activeSkill, conversationHistory, skillContext, programmingLanguage); + return this.buildGroqRequestWithHistory(text, activeSkill, conversationHistory, skillContext, programmingLanguage); } - // Fallback to old method for compatibility - now with programming language support const requestComponents = promptLoader.getRequestComponents( activeSkill, text, @@ -445,265 +274,132 @@ class LLMService { programmingLanguage ); - const request = { - contents: [] - }; - - this.applyGenerationDefaults(request); + const messages = []; - // Use the skill prompt that already has programming language injected if (requestComponents.shouldUseModelMemory && requestComponents.skillPrompt) { - request.systemInstruction = { - parts: [{ text: requestComponents.skillPrompt }] - }; - - logger.debug('Using language-enhanced system instruction for skill', { - skill: activeSkill, - programmingLanguage: programmingLanguage || 'not specified', - promptLength: requestComponents.skillPrompt.length, - requiresProgrammingLanguage: requestComponents.requiresProgrammingLanguage - }); + messages.push({ role: 'system', content: requestComponents.skillPrompt }); } - request.contents.push({ - role: 'user', - parts: [{ text: this.formatUserMessage(text, activeSkill) }] - }); - - return request; + messages.push({ role: 'user', content: this.formatUserMessage(text, activeSkill) }); + return messages; } - buildGeminiRequestWithHistory(text, activeSkill, conversationHistory, skillContext, programmingLanguage) { - const request = { - contents: [] - }; + buildGroqRequestWithHistory(text, activeSkill, conversationHistory, skillContext, programmingLanguage) { + const messages = []; + const sessionManager = require('../managers/session.manager'); + const documentContext = sessionManager.getDocumentContext(); - this.applyGenerationDefaults(request); + if (documentContext) { + messages.push({ role: 'system', content: `## Reference Document Context\n${documentContext}\n\n## FIRST-PERSON RULE\nYou must adopt a first-person persona based on the reference document context provided above. When answering questions, speak directly from the perspective of the document's subject or author. Use "I", "me", "my". Keep your answers extremely concise (not too long, not too short). Do not break character.` }); + } - // Use the skill prompt from context (which may already include programming language) if (skillContext.skillPrompt) { - request.systemInstruction = { - parts: [{ text: skillContext.skillPrompt }] - }; - - logger.debug('Using skill context prompt as system instruction', { - skill: activeSkill, - programmingLanguage: programmingLanguage || 'not specified', - promptLength: skillContext.skillPrompt.length, - requiresProgrammingLanguage: skillContext.requiresProgrammingLanguage || false, - hasLanguageInjection: programmingLanguage && skillContext.requiresProgrammingLanguage - }); + messages.push({ role: 'system', content: skillContext.skillPrompt }); } - // Add conversation history (excluding system messages) with validation const conversationContents = conversationHistory - .filter(event => { - return event.role !== 'system' && - event.content && - typeof event.content === 'string' && - event.content.trim().length > 0; - }) - .map(event => { - const content = event.content.trim(); - return { - role: event.role === 'model' ? 'model' : 'user', - parts: [{ text: content }] - }; - }); + .filter(event => event.role !== 'system' && event.content && typeof event.content === 'string' && event.content.trim().length > 0) + .map(event => ({ + role: event.role === 'model' ? 'assistant' : 'user', + content: event.content.trim() + })); - // Add the conversation history - request.contents.push(...conversationContents); + messages.push(...conversationContents); - // Format and validate the current user input const formattedMessage = this.formatUserMessage(text, activeSkill); if (!formattedMessage || formattedMessage.trim().length === 0) { throw new Error('Failed to format user message or message is empty'); } - // Add the current user input - request.contents.push({ - role: 'user', - parts: [{ text: formattedMessage }] - }); - - logger.debug('Built Gemini request with conversation history', { - skill: activeSkill, - programmingLanguage: programmingLanguage || 'not specified', - historyLength: conversationHistory.length, - totalContents: request.contents.length, - hasSystemInstruction: !!request.systemInstruction, - requiresProgrammingLanguage: skillContext.requiresProgrammingLanguage || false - }); - - return request; + messages.push({ role: 'user', content: formattedMessage }); + return messages; } buildIntelligentTranscriptionRequest(text, activeSkill, sessionMemory, programmingLanguage) { - // Validate input text first const cleanText = text && typeof text === 'string' ? text.trim() : ''; if (!cleanText) { - throw new Error('Empty or invalid transcription text provided to buildIntelligentTranscriptionRequest'); + throw new Error('Empty or invalid transcription text provided'); } - // Check if we have the new conversation history format const sessionManager = require('../managers/session.manager'); if (sessionManager && typeof sessionManager.getConversationHistory === 'function') { - const conversationHistory = sessionManager.getConversationHistory(10); + const conversationHistory = sessionManager.getConversationHistory(4); const skillContext = sessionManager.getSkillContext(activeSkill, programmingLanguage); return this.buildIntelligentTranscriptionRequestWithHistory(cleanText, activeSkill, conversationHistory, skillContext, programmingLanguage); } - // Fallback to basic intelligent request - const request = { - contents: [] - }; - - this.applyGenerationDefaults(request); - - // Add intelligent filtering system instruction - const intelligentPrompt = this.getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage); - if (!intelligentPrompt) { - throw new Error('Failed to generate intelligent transcription prompt'); + const messages = []; + const documentContext = sessionManager ? sessionManager.getDocumentContext() : null; + const intelligentPrompt = this.getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage, documentContext); + + if (intelligentPrompt) { + messages.push({ role: 'system', content: intelligentPrompt }); } - request.systemInstruction = { - parts: [{ text: intelligentPrompt }] - }; - - request.contents.push({ - role: 'user', - parts: [{ text: cleanText }] - }); - - logger.debug('Built basic intelligent transcription request', { - skill: activeSkill, - programmingLanguage: programmingLanguage || 'not specified', - textLength: cleanText.length, - hasSystemInstruction: !!request.systemInstruction - }); - - return request; + messages.push({ role: 'user', content: cleanText }); + return messages; } buildIntelligentTranscriptionRequestWithHistory(text, activeSkill, conversationHistory, skillContext, programmingLanguage) { - const request = { - contents: [] - }; - - this.applyGenerationDefaults(request); - - // For chat/transcription messages, DO NOT include the full skill prompt; use only the intelligent filter prompt - const intelligentPrompt = this.getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage); - request.systemInstruction = { parts: [{ text: intelligentPrompt }] }; + const messages = []; + const sessionManager = require('../managers/session.manager'); + const documentContext = sessionManager.getDocumentContext(); + const intelligentPrompt = this.getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage, documentContext); + + if (intelligentPrompt) { + messages.push({ role: 'system', content: intelligentPrompt }); + } - // Add recent conversation history (excluding system messages) with validation const conversationContents = conversationHistory - .filter(event => { - // Filter out system messages and ensure content exists and is valid - return event.role !== 'system' && - event.content && - typeof event.content === 'string' && - event.content.trim().length > 0; - }) - .slice(-8) // Keep last 8 exchanges for context - .map(event => { - const content = event.content.trim(); - if (!content) { - logger.warn('Empty content found in conversation history', { event }); - return null; - } - return { - role: event.role === 'model' ? 'model' : 'user', - parts: [{ text: content }] - }; - }) - .filter(content => content !== null); // Remove any null entries + .filter(event => event.role !== 'system' && event.content && typeof event.content === 'string' && event.content.trim().length > 0) + .slice(-4) + .map(event => ({ + role: event.role === 'model' ? 'assistant' : 'user', + content: event.content.trim() + })); - // Add the conversation history - request.contents.push(...conversationContents); + messages.push(...conversationContents); - // Validate and add the current transcription const cleanText = text && typeof text === 'string' ? text.trim() : ''; if (!cleanText) { throw new Error('Empty or invalid transcription text provided'); } - request.contents.push({ - role: 'user', - parts: [{ text: cleanText }] - }); - - // Ensure we have at least one content item - if (request.contents.length === 0) { - throw new Error('No valid content to send to Gemini API'); - } - - logger.debug('Built intelligent transcription request with conversation history', { - skill: activeSkill, - programmingLanguage: programmingLanguage || 'not specified', - historyLength: conversationHistory.length, - totalContents: request.contents.length, - hasSkillPrompt: !!skillContext.skillPrompt, - cleanTextLength: cleanText.length, - requiresProgrammingLanguage: skillContext.requiresProgrammingLanguage || false - }); - - return request; + messages.push({ role: 'user', content: cleanText }); + return messages; } - getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage) { + getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage, documentContext = null) { let prompt = `# Intelligent Transcription Response System -Assume you are asked a question in ${activeSkill.toUpperCase()} mode. Your job is to intelligently respond to question/message with appropriate brevity. -Assume you are in an interview and you need to perform best in ${activeSkill.toUpperCase()} mode. -Always respond to the point, do not repeat the question or unnecessary information which is not related to ${activeSkill}.`; +You are acting as an AI assistant for an interviewee during a live interview. +The transcription you receive will contain BOTH the interviewer's questions AND the interviewee's (my) voice. +CRITICAL INSTRUCTION: You must ONLY respond to the interviewer's questions. If the transcription contains the interviewee (me) answering a question or making a statement, IGNORE IT and do not reply. Do not try to answer my own answers! + +## Brevity & Speed Rule +You MUST keep your answers extremely concise. Respond with exactly 1 to 3 short sentences. DO NOT provide long explanations, lists, or pleasantries unless specifically asked. Short answers ensure the response is generated instantly, which is crucial for a live interview.`; + + if (documentContext) { + prompt += `\n\n## Reference Document Context\n${documentContext}\n\n## FIRST-PERSON RULE\nYou must adopt a first-person persona based on the reference document context provided above. Speak directly from the perspective of the document's subject or author. Use "I", "me", "my". Do not break character.`; + } - // Add programming language context if provided if (programmingLanguage) { const lang = String(programmingLanguage).toLowerCase(); const languageMap = { cpp: 'C++', c: 'C', python: 'Python', java: 'Java', javascript: 'JavaScript', js: 'JavaScript' }; const fenceTagMap = { cpp: 'cpp', c: 'c', python: 'python', java: 'java', javascript: 'javascript', js: 'javascript' }; const languageTitle = languageMap[lang] || (lang.charAt(0).toUpperCase() + lang.slice(1)); const fenceTag = fenceTagMap[lang] || lang || 'text'; - prompt += `\n\nCODING CONTEXT: Respond ONLY in ${languageTitle}. All code blocks must use triple backticks with language tag \`\`\`${fenceTag}\`\`\`. Do not include other languages unless explicitly asked.`; + prompt += `\n\nCODING CONTEXT: If writing code, respond ONLY in ${languageTitle}. All code blocks must use triple backticks with language tag \`\`\`${fenceTag}\`\`\`.`; } prompt += ` -## Response Rules: - -### If the transcription is casual conversation, greetings, or NOT related to ${activeSkill}: -- Respond with: "Yeah, I'm listening. Ask your question relevant to ${activeSkill}." -- Or similar brief acknowledgments like: "I'm here, what's your ${activeSkill} question?" - -### If the transcription IS relevant to ${activeSkill} or is a follow-up question: -- Provide a comprehensive, detailed response -- Use bullet points, examples, and explanations -- Focus on actionable insights and complete answers -- Do not truncate or shorten your response - -### Examples of casual/irrelevant messages: -- "Hello", "Hi there", "How are you?" -- "What's the weather like?" -- "I'm just testing this" -- Random conversations not related to ${activeSkill} - -### Examples of relevant messages: -- Actual questions about ${activeSkill} concepts -- Follow-up questions to previous responses -- Requests for clarification on ${activeSkill} topics -- Problem-solving requests related to ${activeSkill} - -## Response Format: -- Keep responses detailed -- Use bullet points for structured answers -- Be encouraging and helpful -- Stay focused on ${activeSkill} - -If the user's input is a coding or DSA problem statement and contains no code, produce a complete, runnable solution in the selected programming language without asking for more details. Always include the final implementation in a properly tagged code block. - -Remember: Be intelligent about filtering - only provide detailed responses when the user actually needs help with ${activeSkill}.`; +## Final Response Rules: +1. Always be conversational and direct. +2. NEVER provide long, detailed responses. Keep it to 1-3 short sentences. +3. If the user asks a coding question, provide a very concise explanation or a brief snippet, but do not write an essay. +4. Remember: DO NOT answer statements made by the interviewee (me). Only answer the interviewer's questions.`; return prompt; } @@ -712,196 +408,93 @@ Remember: Be intelligent about filtering - only provide detailed responses when return `Context: ${activeSkill.toUpperCase()} analysis request\n\nText to analyze:\n${text}`; } - async executeRequest(geminiRequest) { - const maxRetries = config.get('llm.gemini.maxRetries'); - const timeout = config.get('llm.gemini.timeout'); - - // Add request debugging - logger.debug('Executing Gemini request', { - hasModel: !!this.model, - hasClient: !!this.client, - requestKeys: Object.keys(geminiRequest), - timeout, - maxRetries, - nodeVersion: process.version, - platform: process.platform - }); - - for (let attempt = 1; attempt <= maxRetries; attempt++) { - try { - // Pre-flight check - await this.performPreflightCheck(); - - const timeoutPromise = new Promise((_, reject) => - setTimeout(() => reject(new Error('Request timeout')), timeout) - ); - - logger.debug(`Gemini API attempt ${attempt} starting`, { - timestamp: new Date().toISOString(), - timeout - }); - - const requestPromise = this.model.generateContent(geminiRequest); - const result = await Promise.race([requestPromise, timeoutPromise]); - - if (!result.response) { - throw new Error('Empty response from Gemini API'); - } + async executeRequest(messages, isVision = false) { + // Fast model rotation pool — each model has independent rate limits on Groq free tier + const modelPool = isVision + ? ['llama-3.2-11b-vision-preview'] + : ['llama-3.1-8b-instant', 'llama3-8b-8192', 'gemma2-9b-it', 'llama-3.3-70b-versatile']; - const { text, finishReason } = this.extractTextFromCandidates(result.response); + const payload = { + messages, + model: modelPool[0], + ...this.getGenerationConfig() + }; - if (finishReason === 'MAX_TOKENS') { - logger.warn('Gemini primary response reached max tokens limit', { - attempt, - finishReason - }); + // Try each model instantly on rate limit — zero delay rotation + for (let i = 0; i < modelPool.length; i++) { + payload.model = modelPool[i]; + try { + const response = await this.client.chat.completions.create(payload); + + if (!response.choices || response.choices.length === 0) { + throw new Error('Empty response from Groq API'); } - logger.debug('Gemini API request successful', { - attempt, - responseLength: text.length, - finishReason - }); - - return text; + return response.choices[0].message.content; } catch (error) { const errorInfo = this.analyzeError(error); - // Enhanced error logging for fetch failures - if (errorInfo.type === 'NETWORK_ERROR') { - logger.error('Network error details', { - attempt, - errorMessage: error.message, - errorStack: error.stack, - errorName: error.name, - nodeEnv: process.env.NODE_ENV, - electronVersion: process.versions.electron, - chromeVersion: process.versions.chrome, - nodeVersion: process.versions.node, - userAgent: this.getUserAgent() - }); - } - - logger.warn(`Gemini API attempt ${attempt} failed`, { + logger.warn(`Groq model ${payload.model} failed`, { error: error.message, errorType: errorInfo.type, - isNetworkError: errorInfo.isNetworkError, - suggestedAction: errorInfo.suggestedAction, - remainingAttempts: maxRetries - attempt + model: payload.model, + remainingModels: modelPool.length - i - 1 }); - if (attempt === maxRetries) { - const finalError = new Error(`Gemini API failed after ${maxRetries} attempts: ${error.message}`); - finalError.errorAnalysis = errorInfo; - finalError.originalError = error; - throw finalError; + // If rate limited, immediately try next model (no delay!) + if (errorInfo.type === 'RATE_LIMIT_ERROR' && i < modelPool.length - 1) { + logger.info(`Rate limited on ${payload.model}, instantly switching to ${modelPool[i + 1]}`); + continue; // no delay, just try next model } - // Use exponential backoff with jitter for network errors - const baseDelay = errorInfo.isNetworkError ? 2500 : 1500; - const delay = baseDelay * attempt + Math.random() * 1000; - - logger.debug(`Waiting ${delay}ms before retry ${attempt + 1}`, { - baseDelay, - isNetworkError: errorInfo.isNetworkError - }); - - await this.delay(delay); + // For non-rate-limit errors, or if we've exhausted all models, throw + if (i === modelPool.length - 1) { + throw new Error(`All Groq models exhausted: ${error.message}`); + } + + // Small delay only for non-rate-limit errors (network issues etc.) + if (errorInfo.type !== 'RATE_LIMIT_ERROR') { + const delay = 1000 + Math.random() * 500; + await this.delay(delay); + } } } } async performPreflightCheck() { - // Quick connectivity check try { - const startTime = Date.now(); await this.testNetworkConnection({ - host: 'generativelanguage.googleapis.com', + host: 'api.groq.com', port: 443, - name: 'Gemini API Endpoint' + name: 'Groq API Endpoint' }); - const latency = Date.now() - startTime; - - logger.debug('Preflight check passed', { latency }); } catch (error) { - logger.warn('Preflight check failed', { - error: error.message, - suggestion: 'Network connectivity issue detected before API call' - }); - // Don't throw here - let the actual API call fail with more detail - } - } - - getUserAgent() { - try { - // Try to get user agent from Electron if available - if (typeof navigator !== 'undefined' && navigator.userAgent) { - return navigator.userAgent; - } - return `Node.js/${process.version} (${process.platform}; ${process.arch})`; - } catch { - return 'Unknown'; + logger.warn('Preflight check failed', { error: error.message }); } } analyzeError(error) { const errorMessage = error.message.toLowerCase(); - // Network connectivity errors - if (errorMessage.includes('fetch failed') || - errorMessage.includes('network error') || - errorMessage.includes('enotfound') || - errorMessage.includes('econnrefused') || - errorMessage.includes('timeout')) { - return { - type: 'NETWORK_ERROR', - isNetworkError: true, - suggestedAction: 'Check internet connection and firewall settings' - }; + if (errorMessage.includes('fetch failed') || errorMessage.includes('network error') || errorMessage.includes('timeout')) { + return { type: 'NETWORK_ERROR', isNetworkError: true }; } - // API key errors - if (errorMessage.includes('unauthorized') || - errorMessage.includes('invalid api key') || - errorMessage.includes('forbidden')) { - return { - type: 'AUTH_ERROR', - isNetworkError: false, - suggestedAction: 'Verify Gemini API key configuration' - }; + if (errorMessage.includes('unauthorized') || errorMessage.includes('invalid api key')) { + return { type: 'AUTH_ERROR', isNetworkError: false }; } - // Rate limiting - if (errorMessage.includes('quota') || - errorMessage.includes('rate limit') || - errorMessage.includes('too many requests')) { - return { - type: 'RATE_LIMIT_ERROR', - isNetworkError: false, - suggestedAction: 'Wait before retrying or check API quota' - }; + if (errorMessage.includes('rate limit') || errorMessage.includes('too many requests')) { + return { type: 'RATE_LIMIT_ERROR', isNetworkError: false }; } - // Timeout errors - if (errorMessage.includes('request timeout') || errorMessage.includes('etimedout')) { - return { - type: 'TIMEOUT_ERROR', - isNetworkError: true, - suggestedAction: 'Check network latency or increase timeout' - }; - } - - return { - type: 'UNKNOWN_ERROR', - isNetworkError: false, - suggestedAction: 'Check logs for more details' - }; + return { type: 'UNKNOWN_ERROR', isNetworkError: false }; } async checkNetworkConnectivity() { const connectivityTests = [ { host: 'google.com', port: 443, name: 'Google (HTTPS)' }, - { host: 'generativelanguage.googleapis.com', port: 443, name: 'Gemini API Endpoint' } + { host: 'api.groq.com', port: 443, name: 'Groq API Endpoint' } ]; const results = await Promise.allSettled( @@ -917,7 +510,6 @@ Remember: Be intelligent about filtering - only provide detailed responses when })) }; - logger.info('Network connectivity check completed', connectivity); return connectivity; } @@ -947,13 +539,11 @@ Remember: Be intelligent about filtering - only provide detailed responses when } generateFallbackResponse(text, activeSkill) { - logger.info('Generating fallback response', { activeSkill }); - const fallbackResponses = { 'dsa': 'This appears to be a data structures and algorithms problem. Consider breaking it down into smaller components and identifying the appropriate algorithm or data structure to use.', 'system-design': 'For this system design question, consider scalability, reliability, and the trade-offs between different architectural approaches.', 'programming': 'This looks like a programming challenge. Focus on understanding the requirements, edge cases, and optimal time/space complexity.', - 'default': 'I can help analyze this content. Please ensure your Gemini API key is properly configured for detailed analysis.' + 'default': 'I can help analyze this content. Please ensure your Groq API key is properly configured for detailed analysis.' }; const response = fallbackResponses[activeSkill] || fallbackResponses.default; @@ -970,36 +560,7 @@ Remember: Be intelligent about filtering - only provide detailed responses when } generateIntelligentFallbackResponse(text, activeSkill) { - logger.info('Generating intelligent fallback response for transcription', { activeSkill }); - - // Simple heuristic to determine if message seems skill-related - const skillKeywords = { - 'dsa': ['algorithm', 'data structure', 'array', 'tree', 'graph', 'sort', 'search', 'complexity', 'big o'], - 'programming': ['code', 'function', 'variable', 'class', 'method', 'bug', 'debug', 'syntax'], - 'system-design': ['scalability', 'database', 'architecture', 'microservice', 'load balancer', 'cache'], - 'behavioral': ['interview', 'experience', 'situation', 'leadership', 'conflict', 'team'], - 'sales': ['customer', 'deal', 'negotiation', 'price', 'revenue', 'prospect'], - 'presentation': ['slide', 'audience', 'public speaking', 'presentation', 'nervous'], - 'data-science': ['data', 'model', 'machine learning', 'statistics', 'analytics', 'python', 'pandas'], - 'devops': ['deployment', 'ci/cd', 'docker', 'kubernetes', 'infrastructure', 'monitoring'], - 'negotiation': ['negotiate', 'compromise', 'agreement', 'terms', 'conflict resolution'] - }; - - const textLower = text.toLowerCase(); - const relevantKeywords = skillKeywords[activeSkill] || []; - const hasRelevantKeywords = relevantKeywords.some(keyword => textLower.includes(keyword)); - - // Check for question indicators - const questionIndicators = ['how', 'what', 'why', 'when', 'where', 'can you', 'could you', 'should i', '?']; - const seemsLikeQuestion = questionIndicators.some(indicator => textLower.includes(indicator)); - - let response; - if (hasRelevantKeywords || seemsLikeQuestion) { - response = `I'm having trouble processing that right now, but it sounds like a ${activeSkill} question. Could you rephrase or ask more specifically about what you need help with?`; - } else { - response = `Yeah, I'm listening. Ask your question relevant to ${activeSkill}.`; - } - + const response = `Yeah, I'm listening. Ask your question relevant to ${activeSkill}.`; return { response, metadata: { @@ -1018,62 +579,31 @@ Remember: Be intelligent about filtering - only provide detailed responses when } try { - // First check network connectivity - const networkCheck = await this.checkNetworkConnectivity(); - const hasNetworkIssues = networkCheck.tests.some(test => !test.success); - - if (hasNetworkIssues) { - logger.warn('Network connectivity issues detected', networkCheck); - } - - const testRequest = { - contents: [{ - role: 'user', - parts: [{ text: 'Test connection. Please respond with "OK".' }] - }] - }; - - this.applyGenerationDefaults(testRequest, { temperature: 0, maxOutputTokens: 10 }); - const startTime = Date.now(); - const result = await this.model.generateContent(testRequest); - const latency = Date.now() - startTime; - const { text } = this.extractTextFromCandidates(result.response); - - logger.info('Connection test successful', { - response: text, - latency, - networkCheck: hasNetworkIssues ? 'issues_detected' : 'healthy' + const response = await this.client.chat.completions.create({ + messages: [{ role: 'user', content: 'Test connection. Please respond with "OK".' }], + model: config.get('llm.groq.model') || 'llama-3.3-70b-versatile', + max_tokens: 10 }); + const latency = Date.now() - startTime; return { success: true, - response: text, - latency, - networkConnectivity: networkCheck + response: response.choices[0].message.content, + latency }; } catch (error) { - const errorAnalysis = this.analyzeError(error); - logger.error('Connection test failed', { - error: error.message, - errorAnalysis - }); - return { success: false, - error: error.message, - errorAnalysis, - networkConnectivity: await this.checkNetworkConnectivity().catch(() => null) + error: error.message }; } } updateApiKey(newApiKey) { - process.env.GEMINI_API_KEY = newApiKey; + process.env.GROQ_API_KEY = newApiKey; this.isInitialized = false; this.initializeClient(); - - logger.info('API key updated and client reinitialized'); } getStats() { @@ -1082,103 +612,13 @@ Remember: Be intelligent about filtering - only provide detailed responses when requestCount: this.requestCount, errorCount: this.errorCount, successRate: this.requestCount > 0 ? ((this.requestCount - this.errorCount) / this.requestCount) * 100 : 0, - config: config.get('llm.gemini') + config: config.get('llm.groq') }; } delay(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } - - async executeAlternativeRequest(geminiRequest) { - const https = require('https'); - const apiKey = config.getApiKey('GEMINI'); - const model = config.get('llm.gemini.model'); - - logger.info('Using alternative HTTPS request method'); - - const url = `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`; - - const postData = JSON.stringify(geminiRequest); - - const agent = new https.Agent({ keepAlive: true, maxSockets: 1 }); - - const options = { - method: 'POST', - headers: { - 'Content-Type': 'application/json', - 'x-goog-api-key': apiKey, - 'Content-Length': Buffer.byteLength(postData), - 'User-Agent': this.getUserAgent() - }, - timeout: config.get('llm.gemini.timeout'), - agent - }; - - return new Promise((resolve, reject) => { - const req = https.request(url, options, (res) => { - let data = ''; - - res.on('data', (chunk) => { - data += chunk; - }); - - res.on('end', () => { - try { - if (res.statusCode !== 200) { - reject(new Error(`HTTP ${res.statusCode}: ${data}`)); - return; - } - - const response = JSON.parse(data); - - logger.debug('Alternative request response structure', { - hasResponse: !!response, - hasCandidates: !!response.candidates, - candidatesLength: response.candidates?.length, - responseKeys: Object.keys(response || {}), - firstCandidateKeys: response.candidates?.[0] ? Object.keys(response.candidates[0]) : [] - }); - - const { text, finishReason } = this.extractTextFromCandidates(response); - - if (finishReason === 'MAX_TOKENS') { - logger.warn('Gemini alternative response reached max tokens limit', { - finishReason - }); - } - - logger.info('Alternative request successful', { - responseLength: text.length, - statusCode: res.statusCode, - finishReason - }); - - resolve(text.trim()); - } catch (parseError) { - logger.error('Failed to parse alternative response', { - error: parseError.message, - rawResponse: data.substring(0, 500), - statusCode: res.statusCode - }); - reject(new Error(`Failed to parse response: ${parseError.message}`)); - } - }); - }); - - req.on('error', (error) => { - reject(new Error(`Alternative request failed: ${error.message}`)); - }); - - req.on('timeout', () => { - req.destroy(); - reject(new Error('Alternative request timeout')); - }); - - req.write(postData); - req.end(); - }); - } } module.exports = new LLMService(); \ No newline at end of file diff --git a/src/services/speech.service.js b/src/services/speech.service.js index f3716032..d285ad04 100644 --- a/src/services/speech.service.js +++ b/src/services/speech.service.js @@ -1,372 +1,16 @@ -// Enhanced polyfills for Azure Speech SDK in Node.js environment -if (typeof window === 'undefined') { - global.window = { - navigator: { - userAgent: 'Node.js', - platform: 'node', - mediaDevices: { - getUserMedia: () => Promise.resolve({ - getAudioTracks: () => [], - getTracks: () => [], - stop: () => {} - }), - getSupportedConstraints: () => ({ - audio: true, - video: false, - echoCancellation: true, - noiseSuppression: true, - autoGainControl: true, - sampleRate: true, - sampleSize: true, - channelCount: true - }), - enumerateDevices: () => Promise.resolve([ - { - deviceId: 'default', - kind: 'audioinput', - label: 'Default - Microphone', - groupId: 'default' - } - ]) - } - }, - document: { - createElement: (tagName) => { - const element = { - addEventListener: () => {}, - removeEventListener: () => {}, - setAttribute: () => {}, - getAttribute: () => null, - style: {}, - tagName: tagName.toUpperCase(), - nodeType: 1, - nodeName: tagName.toUpperCase(), - appendChild: () => {}, - removeChild: () => {}, - insertBefore: () => {}, - cloneNode: () => element, - hasAttribute: () => false, - removeAttribute: () => {}, - click: () => {}, - focus: () => {}, - blur: () => {} - }; - - // Special handling for audio elements - if (tagName.toLowerCase() === 'audio') { - Object.assign(element, { - play: () => Promise.resolve(), - pause: () => {}, - load: () => {}, - canPlayType: () => 'probably', - volume: 1, - muted: false, - paused: true, - ended: false, - currentTime: 0, - duration: 0, - playbackRate: 1, - defaultPlaybackRate: 1, - readyState: 4, - networkState: 1, - autoplay: false, - loop: false, - controls: false, - crossOrigin: null, - preload: 'metadata', - src: '', - currentSrc: '' - }); - } - - return element; - }, - getElementById: () => null, - getElementsByTagName: () => [], - getElementsByClassName: () => [], - querySelector: () => null, - querySelectorAll: () => [], - body: { - appendChild: () => {}, - removeChild: () => {}, - insertBefore: () => {}, - style: {} - }, - head: { - appendChild: () => {}, - removeChild: () => {}, - insertBefore: () => {}, - style: {} - } - }, - location: { - href: 'file:///', - protocol: 'file:', - host: '', - hostname: '', - port: '', - pathname: '/', - search: '', - hash: '', - origin: 'file://' - }, - addEventListener: () => {}, - removeEventListener: () => {}, - setTimeout: global.setTimeout, - clearTimeout: global.clearTimeout, - setInterval: global.setInterval, - clearInterval: global.clearInterval, - requestAnimationFrame: (callback) => global.setTimeout(callback, 16), - cancelAnimationFrame: global.clearTimeout, - // Add console methods if not available - console: global.console || { - log: () => {}, - error: () => {}, - warn: () => {}, - info: () => {}, - debug: () => {} - }, - AudioContext: class AudioContext { - constructor() { - this.state = 'running'; - this.sampleRate = 16000; - this.currentTime = 0; - this.listener = { - setPosition: () => {}, - setOrientation: () => {} - }; - this.destination = { - connect: () => {}, - disconnect: () => {}, - channelCount: 2, - channelCountMode: 'explicit', - channelInterpretation: 'speakers' - }; - } - createMediaStreamSource(stream) { - return { - connect: () => {}, - disconnect: () => {}, - mediaStream: stream - }; - } - createGain() { - return { - connect: () => {}, - disconnect: () => {}, - gain: { - value: 1, - setValueAtTime: () => {}, - linearRampToValueAtTime: () => {}, - exponentialRampToValueAtTime: () => {} - } - }; - } - createScriptProcessor(bufferSize = 4096, inputChannels = 1, outputChannels = 1) { - return { - connect: () => {}, - disconnect: () => {}, - onaudioprocess: null, - bufferSize, - numberOfInputs: inputChannels, - numberOfOutputs: outputChannels - }; - } - createAnalyser() { - return { - connect: () => {}, - disconnect: () => {}, - fftSize: 2048, - frequencyBinCount: 1024, - minDecibels: -100, - maxDecibels: -30, - smoothingTimeConstant: 0.8, - getByteFrequencyData: () => {}, - getByteTimeDomainData: () => {}, - getFloatFrequencyData: () => {}, - getFloatTimeDomainData: () => {} - }; - } - decodeAudioData(audioData) { - return Promise.resolve({ - length: 44100, - sampleRate: 44100, - numberOfChannels: 1, - duration: 1, - getChannelData: () => new Float32Array(44100) - }); - } - suspend() { - this.state = 'suspended'; - return Promise.resolve(); - } - resume() { - this.state = 'running'; - return Promise.resolve(); - } - close() { - this.state = 'closed'; - return Promise.resolve(); - } - }, - webkitAudioContext: class webkitAudioContext { - constructor() { - this.state = 'running'; - this.sampleRate = 16000; - this.currentTime = 0; - this.listener = { - setPosition: () => {}, - setOrientation: () => {} - }; - this.destination = { - connect: () => {}, - disconnect: () => {}, - channelCount: 2, - channelCountMode: 'explicit', - channelInterpretation: 'speakers' - }; - } - createMediaStreamSource(stream) { - return { - connect: () => {}, - disconnect: () => {}, - mediaStream: stream - }; - } - createGain() { - return { - connect: () => {}, - disconnect: () => {}, - gain: { - value: 1, - setValueAtTime: () => {}, - linearRampToValueAtTime: () => {}, - exponentialRampToValueAtTime: () => {} - } - }; - } - createScriptProcessor(bufferSize = 4096, inputChannels = 1, outputChannels = 1) { - return { - connect: () => {}, - disconnect: () => {}, - onaudioprocess: null, - bufferSize, - numberOfInputs: inputChannels, - numberOfOutputs: outputChannels - }; - } - createAnalyser() { - return { - connect: () => {}, - disconnect: () => {}, - fftSize: 2048, - frequencyBinCount: 1024, - minDecibels: -100, - maxDecibels: -30, - smoothingTimeConstant: 0.8, - getByteFrequencyData: () => {}, - getByteTimeDomainData: () => {}, - getFloatFrequencyData: () => {}, - getFloatTimeDomainData: () => {} - }; - } - decodeAudioData(audioData) { - return Promise.resolve({ - length: 44100, - sampleRate: 44100, - numberOfChannels: 1, - duration: 1, - getChannelData: () => new Float32Array(44100) - }); - } - suspend() { - this.state = 'suspended'; - return Promise.resolve(); - } - resume() { - this.state = 'running'; - return Promise.resolve(); - } - close() { - this.state = 'closed'; - return Promise.resolve(); - } - }, - // Add additional globals that might be needed - URL: class URL { - constructor(url, base) { - this.href = url; - this.protocol = 'https:'; - this.host = 'localhost'; - this.hostname = 'localhost'; - this.port = ''; - this.pathname = '/'; - this.search = ''; - this.hash = ''; - this.origin = 'https://localhost'; - } - toString() { return this.href; } - }, - Blob: class Blob { - constructor(parts = [], options = {}) { - this.size = 0; - this.type = options.type || ''; - this.parts = parts; - } - slice() { return new Blob(); } - stream() { return new ReadableStream(); } - text() { return Promise.resolve(''); } - arrayBuffer() { return Promise.resolve(new ArrayBuffer(0)); } - }, - File: class File { - constructor(parts, name, options = {}) { - this.name = name; - this.size = 0; - this.type = options.type || ''; - this.lastModified = Date.now(); - this.parts = parts; - } - slice() { return new File([], this.name); } - stream() { return new ReadableStream(); } - text() { return Promise.resolve(''); } - arrayBuffer() { return Promise.resolve(new ArrayBuffer(0)); } - } - }; - global.document = global.window.document; - global.navigator = global.window.navigator; - global.AudioContext = global.window.AudioContext; - global.webkitAudioContext = global.window.webkitAudioContext; - global.URL = global.window.URL; - global.Blob = global.window.Blob; - global.File = global.window.File; - - // Additional polyfills that might be needed - if (!global.performance) { - global.performance = { - now: () => Date.now(), - mark: () => {}, - measure: () => {}, - clearMarks: () => {}, - clearMeasures: () => {}, - getEntriesByName: () => [], - getEntriesByType: () => [] - }; - } - - if (!global.crypto) { - global.crypto = { - getRandomValues: (arr) => { - for (let i = 0; i < arr.length; i++) { - arr[i] = Math.floor(Math.random() * 256); - } - return arr; - } - }; - } -} - -const sdk = require('microsoft-cognitiveservices-speech-sdk'); -const recorder = require('node-record-lpcm16'); +/** + * speech.service.js — Thin IPC wrapper that delegates all Azure Speech SDK + * work to speech-worker.js (a forked child process). + * + * Why: The Azure Speech SDK's native networking uses a TLS stack that + * conflicts with Electron/Chromium's boringssl, producing + * CERTIFICATE_VERIFY_FAILED errors and crashing the app on Alt+R. + * By running the SDK in a pure Node child process we avoid + * Chromium's socket layer entirely. + */ + +const { fork } = require('child_process'); +const path = require('path'); const { EventEmitter } = require('events'); const logger = require('../core/logger').createServiceLogger('SPEECH'); const config = require('../core/config'); @@ -374,604 +18,268 @@ const config = require('../core/config'); class SpeechService extends EventEmitter { constructor() { super(); - this.recognizer = null; + this.worker = null; this.isRecording = false; - this.audioConfig = null; - this.speechConfig = null; - this.sessionStartTime = null; - this.retryCount = 0; - this.maxRetries = 3; - this.pushStream = null; - this.recording = null; - this.available = false; // track availability - - this.initializeClient(); + this.available = false; + this._workerReady = false; + this._pendingStatusCallbacks = []; + this._lastStatus = { + isRecording: false, + isInitialized: false, + sessionDuration: 0, + retryCount: 0 + }; + + this._spawnWorker(); } - initializeClient() { + // ── Worker lifecycle ────────────────────────────────────────────────── + + _spawnWorker() { + const workerPath = path.join(__dirname, '../../speech-worker.js'); + try { - // Get Azure Speech credentials from environment variables - const subscriptionKey = process.env.AZURE_SPEECH_KEY; - const region = process.env.AZURE_SPEECH_REGION; - - if (!subscriptionKey || !region) { - const reason = 'Azure Speech credentials not found. Speech recognition disabled.'; - logger.warn('Speech service disabled (missing credentials)'); + this.worker = fork(workerPath, [], { + // No special env manipulation — pure Node TLS + stdio: ['pipe', 'pipe', 'pipe', 'ipc'], + // Ensure the worker doesn't inherit Electron's altered env + env: { + ...process.env, + ELECTRON_RUN_AS_NODE: '1' // Forces pure Node.js mode + } + }); + + this.worker.on('message', (msg) => this._handleWorkerMessage(msg)); + + this.worker.on('error', (err) => { + logger.error('Speech worker process error', { error: err.message }); this.available = false; - this.emit('status', reason); - return; - } + this.isRecording = false; + this.emit('error', `Speech worker error: ${err.message}`); + }); - // Validate region format - const validRegions = ['eastus', 'westus', 'westus2', 'eastus2', 'centralus', 'northcentralus', 'southcentralus', 'westcentralus', 'canadacentral', 'canadaeast', 'brazilsouth', 'northeurope', 'westeurope', 'uksouth', 'ukwest', 'francecentral', 'germanywestcentral', 'norwayeast', 'switzerlandnorth', 'switzerlandwest', 'swedencentral', 'uaenorth', 'southafricanorth', 'centralindia', 'southindia', 'westindia', 'eastasia', 'southeastasia', 'japaneast', 'japanwest', 'koreacentral', 'koreasouth', 'australiaeast', 'australiasoutheast']; - - if (!validRegions.includes(region.toLowerCase())) { - logger.warn('Potentially invalid Azure region specified', { region }); - } + this.worker.on('exit', (code, signal) => { + logger.warn('Speech worker exited', { code, signal }); + this._workerReady = false; + this.isRecording = false; + + // Auto-restart the worker after a short delay (unless the app is quitting) + if (!this._shuttingDown) { + setTimeout(() => { + logger.info('Restarting speech worker'); + this._spawnWorker(); + }, 2000); + } + }); - // Initialize Azure Speech configuration - this.speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, region); - - // Configure speech recognition settings with better defaults - const azureConfig = config.get('speech.azure') || {}; - this.speechConfig.speechRecognitionLanguage = azureConfig.language || 'en-US'; - this.speechConfig.outputFormat = sdk.OutputFormat.Detailed; - - // Set additional properties for better recognition - this.speechConfig.setProperty(sdk.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs, "5000"); - this.speechConfig.setProperty(sdk.PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs, "2000"); - this.speechConfig.setProperty(sdk.PropertyId.Speech_SegmentationSilenceTimeoutMs, "2000"); - - if (azureConfig.enableDictation) { - this.speechConfig.enableDictation(); + // Capture worker stdout/stderr for logging + if (this.worker.stdout) { + this.worker.stdout.on('data', (data) => { + logger.debug('Worker stdout: ' + data.toString().trim()); + }); } - - if (azureConfig.enableAudioLogging) { - this.speechConfig.enableAudioLogging(); + if (this.worker.stderr) { + this.worker.stderr.on('data', (data) => { + logger.warn('Worker stderr: ' + data.toString().trim()); + }); } - - logger.info('Azure Speech service initialized successfully', { - region, - language: azureConfig.language || 'en-US' + + // Send init message with credentials and config + const subscriptionKey = process.env.AZURE_SPEECH_KEY; + const region = process.env.AZURE_SPEECH_REGION; + const azureConfig = config.get('speech.azure') || {}; + + this.worker.send({ + type: 'init', + config: { + subscriptionKey, + region, + azure: azureConfig + } }); - - this.available = true; - this.emit('status', 'Azure Speech Services ready'); - + + logger.info('Speech worker spawned', { pid: this.worker.pid }); } catch (error) { - logger.error('Failed to initialize Azure Speech client', { error: error.message, stack: error.stack }); + logger.error('Failed to spawn speech worker', { error: error.message, stack: error.stack }); this.available = false; - this.emit('status', 'Speech recognition unavailable'); + this.emit('status', 'Speech recognition unavailable (worker failed to start)'); } } - startRecording() { + _sendToWorker(message) { + if (!this.worker || !this.worker.connected) { + logger.warn('Cannot send to worker — not connected', { type: message.type }); + return false; + } try { - if (!this.speechConfig) { - const errorMsg = 'Azure Speech client not initialized'; - logger.error(errorMsg); - this.emit('error', errorMsg); - return; - } - - if (this.isRecording) { - logger.warn('Recording already in progress'); - return; - } - - this.sessionStartTime = Date.now(); - this.retryCount = 0; - - this._attemptRecording(); + this.worker.send(message); + return true; } catch (error) { - logger.error('Critical error in startRecording', { error: error.message, stack: error.stack }); - this.emit('error', `Speech recognition failed to start: ${error.message}`); - this.isRecording = false; + logger.error('Error sending message to worker', { error: error.message, type: message.type }); + return false; } } - _attemptRecording() { - try { - this.isRecording = true; - this.emit('recording-started'); - - // Clean up any existing resources - this._cleanup(); - - // Use push stream with Node.js audio capture (more reliable for Electron main process) - try { - this.pushStream = sdk.AudioInputStream.createPushStream(); - this.audioConfig = sdk.AudioConfig.fromStreamInput(this.pushStream); - - // Start capturing real microphone audio - this._startMicrophoneCapture(); - - } catch (audioError) { - logger.error('Failed to create audio config', { error: audioError.message }); - this.emit('error', 'Audio configuration failed. Please check microphone permissions.'); - this.isRecording = false; - return; - } - - // Create speech recognizer - try { - this.recognizer = new sdk.SpeechRecognizer(this.speechConfig, this.audioConfig); - } catch (recognizerError) { - throw recognizerError; - } - - // Set up event handlers with better error handling - this.recognizer.recognizing = (s, e) => { - try { - if (e.result.reason === sdk.ResultReason.RecognizingSpeech) { - logger.debug('Interim transcription received', { - text: e.result.text, - offset: e.result.offset, - duration: e.result.duration - }); - this.emit('interim-transcription', e.result.text); - } - } catch (error) { - logger.error('Error in recognizing handler', { error: error.message }); - } - }; - - this.recognizer.recognized = (s, e) => { - try { - if (e.result.reason === sdk.ResultReason.RecognizedSpeech) { - const sessionDuration = Date.now() - this.sessionStartTime; - - // Only emit transcription if there's actual text content - if (e.result.text && e.result.text.trim().length > 0) { - logger.info('Final transcription received', { - text: e.result.text, - sessionDuration: `${sessionDuration}ms`, - textLength: e.result.text.length, - confidence: e.result.properties?.getProperty(sdk.PropertyId.SpeechServiceResponse_JsonResult) - }); - - this.emit('transcription', e.result.text); - } else { - logger.debug('Empty transcription result ignored', { - sessionDuration: `${sessionDuration}ms`, - confidence: e.result.properties?.getProperty(sdk.PropertyId.SpeechServiceResponse_JsonResult) - }); - } - } else if (e.result.reason === sdk.ResultReason.NoMatch) { - logger.debug('No speech pattern detected in audio'); - - // Check if there's detailed no-match information - const noMatchDetails = e.result.properties?.getProperty(sdk.PropertyId.SpeechServiceResponse_JsonResult); - if (noMatchDetails) { - logger.debug('No match details', { details: noMatchDetails }); - } - } - } catch (error) { - logger.error('Error in recognized handler', { error: error.message }); - } - }; - - this.recognizer.canceled = (s, e) => { - logger.warn('Recognition session canceled', { - reason: e.reason, - errorCode: e.errorCode, - errorDetails: e.errorDetails - }); - - if (e.reason === sdk.CancellationReason.Error) { - const errorMsg = `Recognition error: ${e.errorDetails}`; - - // Check for specific error types and provide better messages - if (e.errorDetails.includes('1006')) { - this.emit('error', 'Network connection failed. Please check your internet connection.'); - } else if (e.errorDetails.includes('InvalidServiceCredentials')) { - this.emit('error', 'Invalid Azure Speech credentials. Please check AZURE_SPEECH_KEY and AZURE_SPEECH_REGION.'); - } else if (e.errorDetails.includes('Forbidden')) { - this.emit('error', 'Access denied. Please check your Azure Speech service subscription and region.'); - } else if (e.errorDetails.includes('AudioInputMicrophone_InitializationFailure')) { - this.emit('error', 'Microphone initialization failed. Please check microphone permissions and availability.'); + // ── Handle messages from worker ─────────────────────────────────────── + + _handleWorkerMessage(msg) { + switch (msg.type) { + case 'init-result': + this.available = !!msg.available; + this._workerReady = true; + if (msg.available) { + logger.info('Azure Speech service initialized in worker'); + this.emit('status', 'Azure Speech Services ready'); + } else { + logger.warn('Speech service unavailable', { reason: msg.reason }); + this.emit('status', msg.reason || 'Speech recognition unavailable'); + } + break; + + case 'recording-started': + this.isRecording = true; + this.emit('recording-started'); + break; + + case 'recording-stopped': + this.isRecording = false; + this.emit('recording-stopped'); + this.emit('status', 'Recording stopped'); + break; + + case 'transcription': + this.emit('transcription', msg.text); + break; + + case 'interim-transcription': + this.emit('interim-transcription', msg.text); + break; + + case 'error': + logger.error('Speech worker reported error', { error: msg.error }); + this.emit('error', msg.error); + break; + + case 'fatal-error': + logger.error('Speech worker reported FATAL error — stopping gracefully', { error: msg.error }); + this.isRecording = false; + this.emit('error', msg.error); + this.emit('recording-stopped'); + break; + + case 'session-started': + logger.info('Recognition session started', { sessionId: msg.sessionId }); + break; + + case 'session-stopped': + logger.info('Recognition session ended', { sessionId: msg.sessionId }); + break; + + case 'status': + this._lastStatus = msg.status || this._lastStatus; + // Resolve any pending status callbacks + while (this._pendingStatusCallbacks.length > 0) { + const cb = this._pendingStatusCallbacks.shift(); + cb(this._lastStatus); + } + break; + + case 'test-result': + // Handled by testConnection promise + if (this._testResolve) { + this._testResolve(msg); + this._testResolve = null; + } + break; + + case 'log': + // Forward worker logs through the service logger + { + const level = msg.level || 'debug'; + const logMsg = msg.message || ''; + const logData = msg.data || {}; + if (logger[level]) { + logger[level](`[worker] ${logMsg}`, logData); } else { - this.emit('error', errorMsg); - } - - // Attempt retry for transient errors - if (this.retryCount < this.maxRetries && ( - e.errorDetails.includes('1006') || - e.errorDetails.includes('timeout') || - e.errorDetails.includes('network') - )) { - this.retryCount++; - logger.info(`Retrying recognition (attempt ${this.retryCount}/${this.maxRetries})`); - setTimeout(() => { - if (!this.isRecording) { - this._attemptRecording(); - } - }, 1000 * this.retryCount); - return; + logger.debug(`[worker] ${logMsg}`, logData); } } - this.stopRecording(); - }; - - this.recognizer.sessionStarted = (s, e) => { - logger.info('Recognition session started', { sessionId: e.sessionId }); - }; - - this.recognizer.sessionStopped = (s, e) => { - logger.info('Recognition session ended', { sessionId: e.sessionId }); - this.stopRecording(); - }; - - // Start continuous recognition with timeout - const startTimeout = setTimeout(() => { - logger.error('Recognition start timeout'); - this.emit('error', 'Speech recognition start timeout. Please try again.'); - this.stopRecording(); - }, 10000); // 10 second timeout - - this.recognizer.startContinuousRecognitionAsync( - () => { - clearTimeout(startTimeout); - logger.info('Continuous speech recognition started successfully'); - if (global.windowManager) { - global.windowManager.handleRecordingStarted(); - } - }, - (error) => { - clearTimeout(startTimeout); - logger.error('Failed to start continuous recognition', { - error: error.toString(), - retryCount: this.retryCount - }); - - // Attempt retry for initialization failures - if (this.retryCount < this.maxRetries) { - this.retryCount++; - logger.info(`Retrying recognition start (attempt ${this.retryCount}/${this.maxRetries})`); - this.isRecording = false; - setTimeout(() => { - this._attemptRecording(); - }, 2000 * this.retryCount); - } else { - this.emit('error', `Recognition startup failed after ${this.maxRetries} attempts: ${error}`); - this.isRecording = false; - } - } - ); + break; - } catch (error) { - logger.error('Failed to start recording session', { - error: error.message, - stack: error.stack - }); - this.emit('error', `Recording startup failed: ${error.message}`); - this.isRecording = false; + default: + logger.debug('Unknown worker message type', { type: msg.type }); } } - stopRecording() { - if (!this.isRecording) { - return; - } + // ── Public API (matches the old SpeechService interface) ────────────── - this.isRecording = false; - const sessionDuration = this.sessionStartTime ? Date.now() - this.sessionStartTime : 0; - - logger.info('Stopping speech recognition session', { - sessionDuration: `${sessionDuration}ms` - }); - - // Stop continuous recognition - if (this.recognizer) { - try { - this.recognizer.stopContinuousRecognitionAsync( - () => { - logger.info('Speech recognition stopped successfully'); - this.emit('recording-stopped'); - this.emit('status', 'Recording stopped'); - if (global.windowManager) { - global.windowManager.handleRecordingStopped(); - } - this._cleanup(); - }, - (error) => { - logger.error('Error during recognition stop', { error: error.toString() }); - this._cleanup(); - } - ); - } catch (error) { - logger.error('Error stopping recognizer', { error: error.message }); - this._cleanup(); - } - } else { - this._cleanup(); + startRecording() { + if (!this.available) { + const errorMsg = 'Azure Speech client not initialized'; + logger.error(errorMsg); + this.emit('error', errorMsg); + return; } - } - - _cleanup() { - // Clean up recognizer - if (this.recognizer) { - try { - this.recognizer.close(); - } catch (error) { - logger.error('Error closing recognizer', { error: error.message }); - } - this.recognizer = null; + if (this.isRecording) { + logger.warn('Recording already in progress'); + return; } - - // Clean up audio config - if (this.audioConfig) { - try { - // Check if close method exists and call it appropriately - if (typeof this.audioConfig.close === 'function') { - try { - const closeResult = this.audioConfig.close(); - // If it returns a promise, handle it, otherwise just continue - if (closeResult && typeof closeResult.then === 'function') { - // It's a promise, but we don't need to wait for it in cleanup - closeResult.catch((error) => { - logger.error('Error closing audio config', { error: error.message }); - }); - } - } catch (closeError) { - logger.error('Error closing audio config', { error: closeError.message }); - } - } - } catch (error) { - logger.error('Error closing audio config', { error: error.message }); - } - this.audioConfig = null; - } - - // Stop audio recording - if (this.recording) { - try { - this.recording.stop(); - this.recording = null; - } catch (error) { - logger.error('Error stopping audio recording', { error: error.message }); - } - } - - // Clean up push stream - if (this.pushStream) { - try { - // Check if close method exists and call it appropriately - if (typeof this.pushStream.close === 'function') { - const closeResult = this.pushStream.close(); - // If it returns a promise, we can await it, otherwise just continue - if (closeResult && typeof closeResult.then === 'function') { - // It's a promise, but we don't need to wait for it in cleanup - closeResult.catch((error) => { - }); - } - } - } catch (error) { - logger.error('Error closing push stream', { error: error.message }); - } - this.pushStream = null; - } - - // Reset audio data logging flag - this._audioDataLogged = false; + this._sendToWorker({ type: 'start' }); } - async recognizeFromFile(audioFilePath) { - if (!this.speechConfig) { - throw new Error('Speech service not initialized'); - } - - const startTime = Date.now(); - - try { - // Validate file exists and is readable - const fs = require('fs'); - if (!fs.existsSync(audioFilePath)) { - throw new Error(`Audio file not found: ${audioFilePath}`); - } - - const audioConfig = sdk.AudioConfig.fromWavFileInput(audioFilePath); - const recognizer = new sdk.SpeechRecognizer(this.speechConfig, audioConfig); - - const result = await new Promise((resolve, reject) => { - const timeout = setTimeout(() => { - reject(new Error('File recognition timeout')); - recognizer.close(); - }, 30000); // 30 second timeout - - recognizer.recognizeOnceAsync( - (result) => { - clearTimeout(timeout); - if (result.reason === sdk.ResultReason.RecognizedSpeech) { - resolve(result.text); - } else if (result.reason === sdk.ResultReason.NoMatch) { - resolve(''); // No speech detected in file - } else { - reject(new Error(`File recognition failed: ${result.reason}`)); - } - recognizer.close(); - audioConfig.close(); - }, - (error) => { - clearTimeout(timeout); - reject(new Error(`File recognition error: ${error}`)); - recognizer.close(); - audioConfig.close(); - } - ); - }); - - logger.logPerformance('File speech recognition', startTime, { - filePath: audioFilePath, - textLength: result.length - }); - - return result; - } catch (error) { - logger.error('File recognition failed', { - filePath: audioFilePath, - error: error.message - }); - throw error; - } + stopRecording() { + if (!this.isRecording) return; + this._sendToWorker({ type: 'stop' }); } getStatus() { + // Return the last known status synchronously (for backward compat) return { isRecording: this.isRecording, - isInitialized: !!this.speechConfig, - sessionDuration: this.sessionStartTime ? Date.now() - this.sessionStartTime : 0, - retryCount: this.retryCount, + isInitialized: this.available, + sessionDuration: this._lastStatus.sessionDuration || 0, + retryCount: this._lastStatus.retryCount || 0, config: config.get('speech.azure') || {} }; } - // Test connection method - async testConnection() { - if (!this.speechConfig) { - throw new Error('Speech service not initialized'); - } - - try { - // Create a simple test recognizer - const audioConfig = sdk.AudioConfig.fromDefaultMicrophoneInput(); - const recognizer = new sdk.SpeechRecognizer(this.speechConfig, audioConfig); - - // Test by attempting to create the recognizer (this validates credentials) - recognizer.close(); - audioConfig.close(); - - return { success: true, message: 'Connection test successful' }; - } catch (error) { - return { success: false, message: error.message }; - } - } - - // Start capturing real microphone audio using node-record-lpcm16 - _startMicrophoneCapture() { - if (!this.pushStream) return; - - try { - // Check if recorder is available - if (!recorder || typeof recorder.record !== 'function') { - throw new Error('node-record-lpcm16 not available or not properly installed'); - } - - // Configure audio recording with error handling - this.recording = recorder.record({ - sampleRateHertz: 16000, // Azure Speech SDK prefers 16kHz - threshold: 0, // No silence threshold - verbose: false, // Quiet logging - recordProgram: 'sox', // Try 'sox' first (most common on macOS) - silence: '10.0s' // Longer silence threshold - }); - - if (!this.recording) { - throw new Error('Failed to create audio recording instance'); - } - - // Add error handler for the recording stream before using it - this.recording.stream().on('error', (error) => { - logger.error('Audio recording stream error', { error: error.message }); - - // Don't emit error immediately, try to recover - this._handleAudioError(error); - }); - - // Pipe audio data to Azure Speech SDK - this.recording.stream().on('data', (chunk) => { - if (this.pushStream && this.isRecording) { - try { - this.pushStream.write(chunk); - // Console log only first few chunks to avoid spam - if (!this._audioDataLogged) { - this._audioDataLogged = true; - } - } catch (error) { - } - } - }); - - } catch (error) { - logger.error('Failed to start microphone capture', { error: error.message, stack: error.stack }); - - // Fall back to no audio capture (Azure SDK will still work without audio) - this.emit('error', `Microphone capture failed: ${error.message}. Speech recognition may not work properly.`); - } - } - - // Handle audio recording errors with recovery attempts - _handleAudioError(error) { - - // Try to restart recording with different program - if (this.recording) { - try { - this.recording.stop(); - } catch (stopError) { - } - this.recording = null; - } - - // Try with different recording program - setTimeout(() => { - if (this.isRecording) { - this._startMicrophoneCaptureWithFallback(); - } - }, 1000); - } - - // Try microphone capture with different programs as fallback - _startMicrophoneCaptureWithFallback() { - const programs = ['sox', 'rec', 'arecord']; - let currentProgramIndex = 0; - - const tryNextProgram = () => { - if (currentProgramIndex >= programs.length) { - this.emit('error', 'Could not start microphone capture with any audio program'); - return; - } - - const program = programs[currentProgramIndex]; - - try { - this.recording = recorder.record({ - sampleRateHertz: 16000, - threshold: 0, - verbose: false, - recordProgram: program, - silence: '10.0s' - }); - - this.recording.stream().on('error', (error) => { - currentProgramIndex++; - tryNextProgram(); - }); - - this.recording.stream().on('data', (chunk) => { - if (this.pushStream && this.isRecording) { - try { - this.pushStream.write(chunk); - if (!this._audioDataLogged) { - this._audioDataLogged = true; - } - } catch (error) { - logger.error('Error writing audio data', { error: error.message }); - } - } - }); - } catch (error) { - logger.error(`${program} configuration failed`, { error: error.message }); - currentProgramIndex++; - tryNextProgram(); - } - }; - - tryNextProgram(); - } - - // Expose availability to UI + async testConnection() { + if (!this.available) { + return { success: false, message: 'Speech service not initialized' }; + } + return new Promise((resolve) => { + this._testResolve = resolve; + const sent = this._sendToWorker({ type: 'test' }); + if (!sent) { + resolve({ success: false, message: 'Worker not connected' }); + } + // Timeout after 5 seconds + setTimeout(() => { + if (this._testResolve === resolve) { + this._testResolve = null; + resolve({ success: false, message: 'Test timed out' }); + } + }, 5000); + }); + } + isAvailable() { - return !!this.speechConfig && !!this.available; + return this.available; + } + + // ── Shutdown ────────────────────────────────────────────────────────── + + shutdown() { + this._shuttingDown = true; + if (this.worker && this.worker.connected) { + this._sendToWorker({ type: 'shutdown' }); + // Give the worker a moment, then force-kill + setTimeout(() => { + if (this.worker && !this.worker.killed) { + this.worker.kill(); + } + }, 2000); + } } } diff --git a/src/ui/main-window.js b/src/ui/main-window.js index 8c6b16e5..8a72abad 100644 --- a/src/ui/main-window.js +++ b/src/ui/main-window.js @@ -259,14 +259,15 @@ class MainWindowUI { this.skillIndicator = document.getElementById('skillIndicator'); this.settingsIndicator = document.getElementById('settingsIndicator'); // Optional this.micButton = document.getElementById('micButton'); - this.infoButton = document.getElementById('infoButton'); - this.shortcutsPopover = document.getElementById('shortcutsPopover'); + this.uploadButton = document.getElementById('uploadButton'); + this.infoButton = document.getElementById('infoButton'); + this.shortcutsPopover = document.getElementById('shortcutsPopover'); // NEW: Screenshot button is the first .command-item without id const commandItems = document.querySelectorAll('.command-item'); this.screenshotButton = commandItems && commandItems[0]; - if (!this.statusDot || !this.skillIndicator || !this.micButton || !this.screenshotButton) { + if (!this.statusDot || !this.skillIndicator || !this.micButton || !this.screenshotButton) { throw new Error('Required UI elements not found'); } @@ -309,6 +310,46 @@ class MainWindowUI { } } }); + + // Add click handler for document upload + if (this.uploadButton) { + this.uploadButton.addEventListener('click', async () => { + if (!this.isInteractive) return; + + // Show a loading/processing visual indicator + this.uploadButton.style.opacity = '0.5'; + + if (window.electronAPI && window.electronAPI.uploadDocument) { + try { + const result = await window.electronAPI.uploadDocument(); + if (result && result.success) { + // Turn green briefly to show success + this.uploadButton.style.color = '#4caf50'; + setTimeout(() => { + this.uploadButton.style.color = ''; + }, 2000); + } else if (result && result.canceled) { + // Do nothing if canceled + } else { + // Turn red briefly to show error + this.uploadButton.style.color = '#ff4757'; + setTimeout(() => { + this.uploadButton.style.color = ''; + }, 2000); + } + } catch (error) { + logger.error('Upload failed', error); + this.uploadButton.style.color = '#ff4757'; + setTimeout(() => { + this.uploadButton.style.color = ''; + }, 2000); + } + } + + // Restore opacity + this.uploadButton.style.opacity = '1'; + }); + } // Language dropdown this.languageSelect = document.getElementById('codingLanguage'); From 20f6488f31800044e9fb8883dc024cc65731b33f Mon Sep 17 00:00:00 2001 From: Matthias Date: Wed, 3 Jun 2026 10:34:01 +0100 Subject: [PATCH 2/9] Implement multi-key rotation and failover for Groq API in LLMService --- src/services/llm.service.js | 98 +++++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 30 deletions(-) diff --git a/src/services/llm.service.js b/src/services/llm.service.js index b87c0ff6..5a929126 100644 --- a/src/services/llm.service.js +++ b/src/services/llm.service.js @@ -5,7 +5,8 @@ const { promptLoader } = require('../../prompt-loader'); class LLMService { constructor() { - this.client = null; + this.clients = []; + this.currentClientIndex = 0; this.isInitialized = false; this.requestCount = 0; this.errorCount = 0; @@ -14,24 +15,27 @@ class LLMService { } initializeClient() { - const apiKey = config.getApiKey('GROQ'); + const apiKeyString = config.getApiKey('GROQ') || ''; + const apiKeys = apiKeyString.split(',').map(k => k.trim()).filter(k => k && k !== 'your-api-key-here' && k !== 'your_groq_api_key_here'); - if (!apiKey || apiKey === 'your-api-key-here') { + if (apiKeys.length === 0) { logger.warn('Groq API key not configured', { - keyExists: !!apiKey + keyExists: false }); return; } try { - this.client = new Groq({ apiKey }); + this.clients = apiKeys.map(apiKey => new Groq({ apiKey })); + this.currentClientIndex = 0; this.isInitialized = true; - logger.info('Groq AI client initialized successfully', { + logger.info('Groq AI clients initialized successfully', { + keyCount: apiKeys.length, model: config.get('llm.groq.model') }); } catch (error) { - logger.error('Failed to initialize Groq client', { + logger.error('Failed to initialize Groq clients', { error: error.message }); } @@ -420,45 +424,78 @@ You MUST keep your answers extremely concise. Respond with exactly 1 to 3 short ...this.getGenerationConfig() }; + let lastError = null; + // Try each model instantly on rate limit — zero delay rotation for (let i = 0; i < modelPool.length; i++) { payload.model = modelPool[i]; - try { - const response = await this.client.chat.completions.create(payload); + + // Try each API key for the current model + for (let j = 0; j < this.clients.length; j++) { + const clientIndex = (this.currentClientIndex + j) % this.clients.length; + const currentClient = this.clients[clientIndex]; - if (!response.choices || response.choices.length === 0) { - throw new Error('Empty response from Groq API'); + try { + const response = await currentClient.chat.completions.create(payload); + + if (!response.choices || response.choices.length === 0) { + throw new Error('Empty response from Groq API'); + } + + // Advance the starting index for the next global request to distribute load, or stay. We'll stay to maximize usage till rate limit. + this.currentClientIndex = clientIndex; + + return response.choices[0].message.content; + } catch (error) { + lastError = error; + const errorInfo = this.analyzeError(error); + + logger.warn(`Groq model ${payload.model} failed on API key index ${clientIndex}`, { + error: error.message, + errorType: errorInfo.type, + model: payload.model, + keyIndex: clientIndex + }); + + // If rate limited, instantly try next key for the SAME model + if (errorInfo.type === 'RATE_LIMIT_ERROR') { + continue; + } + + // If auth error (e.g. invalid key), instantly try next key + if (errorInfo.type === 'AUTH_ERROR') { + continue; + } + + // For other errors (like model decommissioned), break inner loop to move to next model + break; } - - return response.choices[0].message.content; - } catch (error) { - const errorInfo = this.analyzeError(error); + } + + // If we got here, all keys for this model failed. + if (lastError) { + const errorInfo = this.analyzeError(lastError); - logger.warn(`Groq model ${payload.model} failed`, { - error: error.message, - errorType: errorInfo.type, - model: payload.model, - remainingModels: modelPool.length - i - 1 - }); - - // If rate limited, immediately try next model (no delay!) + // If rate limited across all keys, switch model instantly if (errorInfo.type === 'RATE_LIMIT_ERROR' && i < modelPool.length - 1) { - logger.info(`Rate limited on ${payload.model}, instantly switching to ${modelPool[i + 1]}`); - continue; // no delay, just try next model + logger.info(`Rate limited across all keys for ${payload.model}, instantly switching to ${modelPool[i + 1]}`); + continue; } - // For non-rate-limit errors, or if we've exhausted all models, throw + // If it's the last model, we throw if (i === modelPool.length - 1) { - throw new Error(`All Groq models exhausted: ${error.message}`); + throw new Error(`All Groq models and keys exhausted: ${lastError.message}`); } - // Small delay only for non-rate-limit errors (network issues etc.) + // Small delay for network errors etc if (errorInfo.type !== 'RATE_LIMIT_ERROR') { const delay = 1000 + Math.random() * 500; await this.delay(delay); } } } + + throw new Error(`All Groq models and keys exhausted. Last error: ${lastError ? lastError.message : 'Unknown'}`); } async performPreflightCheck() { @@ -574,13 +611,14 @@ You MUST keep your answers extremely concise. Respond with exactly 1 to 3 short } async testConnection() { - if (!this.isInitialized) { + if (!this.isInitialized || !this.clients || this.clients.length === 0) { return { success: false, error: 'Service not initialized' }; } try { const startTime = Date.now(); - const response = await this.client.chat.completions.create({ + const currentClient = this.clients[this.currentClientIndex]; + const response = await currentClient.chat.completions.create({ messages: [{ role: 'user', content: 'Test connection. Please respond with "OK".' }], model: config.get('llm.groq.model') || 'llama-3.3-70b-versatile', max_tokens: 10 From 9d93a5fd4982b7154c44528afd2062ccda3b0111 Mon Sep 17 00:00:00 2001 From: Matthias Date: Wed, 3 Jun 2026 13:11:21 +0100 Subject: [PATCH 3/9] Migrate Azure Speech to Groq Whisper and fix prompt verbosity --- speech-worker.js | 477 +++++++-------------------------- src/services/llm.service.js | 9 +- src/services/speech.service.js | 9 +- 3 files changed, 112 insertions(+), 383 deletions(-) diff --git a/speech-worker.js b/speech-worker.js index 5df44efb..d6c6e2c9 100644 --- a/speech-worker.js +++ b/speech-worker.js @@ -1,469 +1,200 @@ -/** - * speech-worker.js — Pure Node.js worker for Azure Speech SDK - * - * This file runs as a forked child process (child_process.fork) so that - * the Azure Speech SDK's native networking uses Node's TLS stack instead - * of Electron/Chromium's boringssl, which was causing CERTIFICATE_VERIFY_FAILED - * errors and crashing the app when Alt+R was pressed. - * - * Audio capture: Uses `child_process.spawn` to capture audio via `sox`. - * On Windows it uses `-t waveaudio default`, and on other platforms `-d`. - * The raw PCM stream is written into the Azure SDK's PushAudioInputStream. - * - * Communication with the main process is via IPC messages: - * Main → Worker: { type: 'start' | 'stop' | 'test' | 'status' | 'init', ... } - * Worker → Main: { type: 'recording-started' | 'recording-stopped' | 'transcription' - * | 'interim-transcription' | 'error' | 'status' | 'canceled' - * | 'session-started' | 'session-stopped' | 'init-result' - * | 'log', ... } - */ - 'use strict'; -// ── Deps ──────────────────────────────────────────────────────────────── -const sdk = require('microsoft-cognitiveservices-speech-sdk'); +const fs = require('fs'); +const path = require('path'); const { spawn } = require('child_process'); +const Groq = require('groq-sdk'); -// ── State ─────────────────────────────────────────────────────────────── -let recognizer = null; -let pushStream = null; -let audioConfig = null; -let speechConfig = null; -let recordingProcess = null; +let groq = null; let isRecording = false; let sessionStartTime = null; -let retryCount = 0; -const maxRetries = 3; -let _audioDataLogged = false; +let recordingProcess = null; let available = false; +let retryCount = 0; -// ── Logging helper (sends to main process) ────────────────────────────── function log(level, message, data) { try { process.send({ type: 'log', level, message, data: data || {} }); } catch (_) { - // If IPC is broken, just write to stderr so we don't lose the info process.stderr.write(`[speech-worker] ${level}: ${message} ${JSON.stringify(data || {})}\n`); } } -// ── Initialisation ────────────────────────────────────────────────────── function initialize(config) { try { - const subscriptionKey = config.subscriptionKey; - const region = config.region; - - if (!subscriptionKey || !region) { + if (!config.groqKey) { available = false; - process.send({ type: 'init-result', available: false, reason: 'Missing Azure Speech credentials' }); + process.send({ type: 'init-result', available: false, reason: 'Missing GROQ_API_KEY' }); return; } - - speechConfig = sdk.SpeechConfig.fromSubscription(subscriptionKey, region); - - // Language & output format - const lang = (config.azure && config.azure.language) || 'en-US'; - speechConfig.speechRecognitionLanguage = lang; - speechConfig.outputFormat = sdk.OutputFormat.Detailed; - - // Timeouts - speechConfig.setProperty(sdk.PropertyId.SpeechServiceConnection_InitialSilenceTimeoutMs, '5000'); - speechConfig.setProperty(sdk.PropertyId.SpeechServiceConnection_EndSilenceTimeoutMs, '2000'); - speechConfig.setProperty(sdk.PropertyId.Speech_SegmentationSilenceTimeoutMs, '2000'); - - if (config.azure && config.azure.enableDictation) { - speechConfig.enableDictation(); - } - if (config.azure && config.azure.enableAudioLogging) { - speechConfig.enableAudioLogging(); - } - + groq = new Groq({ apiKey: config.groqKey }); available = true; - log('info', 'Azure Speech service initialized in worker', { region, language: lang }); + log('info', 'Groq SDK initialized in worker'); process.send({ type: 'init-result', available: true }); } catch (error) { available = false; - log('error', 'Failed to initialize Azure Speech client in worker', { error: error.message, stack: error.stack }); + log('error', 'Failed to initialize Groq SDK', { error: error.message }); process.send({ type: 'init-result', available: false, reason: error.message }); } } -// ── Cleanup ───────────────────────────────────────────────────────────── function cleanup() { - if (recognizer) { - try { recognizer.close(); } catch (_) {} - recognizer = null; - } - if (audioConfig) { - try { - if (typeof audioConfig.close === 'function') { - const r = audioConfig.close(); - if (r && typeof r.then === 'function') r.catch(() => {}); - } - } catch (_) {} - audioConfig = null; - } if (recordingProcess) { - try { recordingProcess.kill(); } catch (_) {} + try { recordingProcess.kill('SIGKILL'); } catch (_) {} recordingProcess = null; } - if (pushStream) { - try { - if (typeof pushStream.close === 'function') { - const r = pushStream.close(); - if (r && typeof r.then === 'function') r.catch(() => {}); - } - } catch (_) {} - pushStream = null; - } - _audioDataLogged = false; } -// ── Microphone capture ────────────────────────────────────────────────── -function startMicrophoneCapture() { - if (!pushStream) return; - - try { - const isWindows = process.platform === 'win32'; - const cmd = 'sox'; - let args = []; - - // sox format arguments: raw PCM, 16kHz, 16-bit, mono, signed integer - const formatArgs = ['-b', '16', '-e', 'signed', '-c', '1', '-r', '16000', '-t', 'raw', '-']; - - if (isWindows) { - // Windows needs waveaudio driver explicitly - args = ['-t', 'waveaudio', 'default', '-q', ...formatArgs]; - } else { - // Unix uses the default device flag - args = ['-d', '-q', ...formatArgs]; - } - - recordingProcess = spawn(cmd, args); - - recordingProcess.on('error', (error) => { - log('error', 'Failed to spawn sox', { error: error.message }); - process.send({ type: 'error', error: `Microphone capture failed (sox error): ${error.message}` }); - handleAudioError(); - }); - - recordingProcess.on('close', (code) => { - if (code !== 0 && code !== null && isRecording) { - log('warn', `sox exited with code ${code}`); - } - }); - - recordingProcess.stdout.on('data', (chunk) => { - if (pushStream && isRecording) { - try { - pushStream.write(chunk); - if (!_audioDataLogged) { - _audioDataLogged = true; - log('debug', 'First audio chunk received via sox', { size: chunk.length }); - } - } catch (err) { - log('error', 'Error writing audio data to push stream', { error: err.message }); - } - } - }); - - log('info', `Microphone capture started via sox (${isWindows ? 'waveaudio' : 'default device'})`); - } catch (error) { - log('error', 'Failed to start microphone capture', { error: error.message, stack: error.stack }); - process.send({ type: 'error', error: `Microphone capture failed: ${error.message}` }); - handleAudioError(); +async function runRecordingLoop() { + if (!isRecording) return; + + const tempWavPath = path.join(__dirname, 'temp_audio.wav'); + const isWindows = process.platform === 'win32'; + const cmd = 'sox'; + let args = []; + + // sox format arguments: raw PCM, 16kHz, 16-bit, mono + // we wait for 0.1s of sound > 1%, then stop after 0.8s of silence < 1% + const formatArgs = ['-b', '16', '-e', 'signed', '-c', '1', '-r', '16000', tempWavPath, 'silence', '1', '0.1', '1%', '1', '0.8', '1%']; + + if (isWindows) { + args = ['-t', 'waveaudio', 'default', '-q', ...formatArgs]; + } else { + args = ['-d', '-q', ...formatArgs]; } -} -function handleAudioError() { - if (recordingProcess) { - try { recordingProcess.kill(); } catch (_) {} - recordingProcess = null; - } -} + recordingProcess = spawn(cmd, args); -// ── Start recording ───────────────────────────────────────────────────── -function startRecording() { - try { - if (!speechConfig) { - process.send({ type: 'error', error: 'Azure Speech client not initialized' }); - return; - } + recordingProcess.on('error', (error) => { + log('error', 'Failed to spawn sox', { error: error.message }); if (isRecording) { - log('warn', 'Recording already in progress'); - return; + process.send({ type: 'error', error: `Microphone capture failed (sox error): ${error.message}` }); + stopRecording(); } + }); - sessionStartTime = Date.now(); - retryCount = 0; - attemptRecording(); - } catch (error) { - log('error', 'Critical error in startRecording', { error: error.message, stack: error.stack }); - process.send({ type: 'error', error: `Speech recognition failed to start: ${error.message}` }); - isRecording = false; - } -} - -function attemptRecording() { - try { - isRecording = true; - process.send({ type: 'recording-started' }); - - cleanup(); + recordingProcess.on('close', async (code) => { + recordingProcess = null; + + if (!isRecording) return; - try { - pushStream = sdk.AudioInputStream.createPushStream(); - audioConfig = sdk.AudioConfig.fromStreamInput(pushStream); - startMicrophoneCapture(); - } catch (audioError) { - log('error', 'Failed to create audio config', { error: audioError.message }); - process.send({ type: 'error', error: 'Audio configuration failed.' }); - isRecording = false; - return; + if (code !== 0 && code !== null) { + log('warn', `sox exited with code ${code}`); } - // Create recognizer + // Process the file try { - recognizer = new sdk.SpeechRecognizer(speechConfig, audioConfig); - } catch (recErr) { - log('error', 'Failed to create speech recognizer', { error: recErr.message }); - process.send({ type: 'error', error: `Failed to create recognizer: ${recErr.message}` }); - isRecording = false; - cleanup(); - return; - } - - // ── Event handlers ────────────────────────────────────────────────── - recognizer.recognizing = (_s, e) => { - try { - if (e.result.reason === sdk.ResultReason.RecognizingSpeech) { - log('debug', 'Interim transcription', { text: e.result.text }); - process.send({ type: 'interim-transcription', text: e.result.text }); - } - } catch (err) { - log('error', 'Error in recognizing handler', { error: err.message }); - } - }; - - recognizer.recognized = (_s, e) => { - try { - if (e.result.reason === sdk.ResultReason.RecognizedSpeech) { - const dur = Date.now() - sessionStartTime; - if (e.result.text && e.result.text.trim().length > 0) { - log('info', 'Final transcription', { text: e.result.text, sessionDuration: `${dur}ms` }); - process.send({ type: 'transcription', text: e.result.text }); - } else { - log('debug', 'Empty transcription ignored'); + if (fs.existsSync(tempWavPath)) { + const stats = fs.statSync(tempWavPath); + if (stats.size > 2000) { // Check if it's not basically empty (WAV headers are ~44 bytes) + log('debug', 'Uploading audio to Groq Whisper...', { size: stats.size }); + + process.send({ type: 'interim-transcription', text: 'Transcribing...' }); + + const transcription = await groq.audio.transcriptions.create({ + file: fs.createReadStream(tempWavPath), + model: 'whisper-large-v3-turbo', + response_format: 'text', + language: 'en' + }); + + if (transcription && transcription.trim().length > 0) { + const dur = Date.now() - sessionStartTime; + log('info', 'Final transcription', { text: transcription.trim(), sessionDuration: `${dur}ms` }); + process.send({ type: 'transcription', text: transcription.trim() }); } - } else if (e.result.reason === sdk.ResultReason.NoMatch) { - log('debug', 'No speech pattern detected'); - } - } catch (err) { - log('error', 'Error in recognized handler', { error: err.message }); - } - }; - - recognizer.canceled = (_s, e) => { - log('warn', 'Recognition canceled', { - reason: e.reason, - errorCode: e.errorCode, - errorDetails: e.errorDetails - }); - - if (e.reason === sdk.CancellationReason.Error) { - let userMsg; - if (e.errorDetails && e.errorDetails.includes('1006')) { - userMsg = 'Network connection failed. Please check your internet connection.'; - } else if (e.errorDetails && e.errorDetails.includes('InvalidServiceCredentials')) { - userMsg = 'Invalid Azure Speech credentials. Please check AZURE_SPEECH_KEY and AZURE_SPEECH_REGION.'; - } else if (e.errorDetails && e.errorDetails.includes('Forbidden')) { - userMsg = 'Access denied. Please check your Azure Speech service subscription and region.'; - } else if (e.errorDetails && e.errorDetails.includes('AudioInputMicrophone_InitializationFailure')) { - userMsg = 'Microphone initialization failed. Please check microphone permissions and availability.'; - } else { - userMsg = `Recognition error: ${e.errorDetails}`; - } - process.send({ type: 'error', error: userMsg }); - - // Retry for transient errors - if (retryCount < maxRetries && e.errorDetails && - (e.errorDetails.includes('1006') || e.errorDetails.includes('timeout') || e.errorDetails.includes('network'))) { - retryCount++; - log('info', `Retrying recognition (attempt ${retryCount}/${maxRetries})`); - setTimeout(() => { - if (!isRecording) attemptRecording(); - }, 1000 * retryCount); - return; - } - - // Persistent credential / network errors → notify UI to stop gracefully - if (e.errorDetails && - (e.errorDetails.includes('InvalidServiceCredentials') || e.errorDetails.includes('Forbidden'))) { - process.send({ type: 'fatal-error', error: userMsg }); } } - stopRecording(); - }; - - recognizer.sessionStarted = (_s, e) => { - log('info', 'Recognition session started', { sessionId: e.sessionId }); - process.send({ type: 'session-started', sessionId: e.sessionId }); - }; - - recognizer.sessionStopped = (_s, e) => { - log('info', 'Recognition session ended', { sessionId: e.sessionId }); - process.send({ type: 'session-stopped', sessionId: e.sessionId }); - stopRecording(); - }; - - // ── Start continuous recognition ──────────────────────────────────── - const startTimeout = setTimeout(() => { - log('error', 'Recognition start timeout'); - process.send({ type: 'error', error: 'Speech recognition start timeout. Please try again.' }); - stopRecording(); - }, 10000); + } catch (err) { + log('error', 'Groq transcription failed', { error: err.message }); + // We don't stop recording on API error, we just keep looping unless it's fatal + } - recognizer.startContinuousRecognitionAsync( - () => { - clearTimeout(startTimeout); - log('info', 'Continuous speech recognition started successfully'); - }, - (error) => { - clearTimeout(startTimeout); - log('error', 'Failed to start continuous recognition', { error: error.toString(), retryCount }); + // Loop! + if (isRecording) { + setTimeout(() => runRecordingLoop(), 10); + } + }); +} - if (retryCount < maxRetries) { - retryCount++; - log('info', `Retrying recognition start (attempt ${retryCount}/${maxRetries})`); - isRecording = false; - setTimeout(() => { attemptRecording(); }, 2000 * retryCount); - } else { - process.send({ type: 'error', error: `Recognition startup failed after ${maxRetries} attempts: ${error}` }); - isRecording = false; - } - } - ); - } catch (error) { - log('error', 'Failed to start recording session', { error: error.message, stack: error.stack }); - process.send({ type: 'error', error: `Recording startup failed: ${error.message}` }); - isRecording = false; +function startRecording() { + if (!available) { + process.send({ type: 'error', error: 'Groq API not initialized' }); + return; } + if (isRecording) { + log('warn', 'Recording already in progress'); + return; + } + isRecording = true; + sessionStartTime = Date.now(); + retryCount = 0; + process.send({ type: 'recording-started' }); + process.send({ type: 'session-started', sessionId: 'groq-' + Date.now() }); + + cleanup(); + runRecordingLoop(); } -// ── Stop recording ────────────────────────────────────────────────────── function stopRecording() { if (!isRecording) return; - isRecording = false; + cleanup(); + const dur = sessionStartTime ? Date.now() - sessionStartTime : 0; log('info', 'Stopping speech recognition', { sessionDuration: `${dur}ms` }); - - if (recognizer) { - try { - recognizer.stopContinuousRecognitionAsync( - () => { - log('info', 'Speech recognition stopped successfully'); - process.send({ type: 'recording-stopped' }); - cleanup(); - }, - (error) => { - log('error', 'Error stopping recognition', { error: error.toString() }); - process.send({ type: 'recording-stopped' }); - cleanup(); - } - ); - } catch (error) { - log('error', 'Error stopping recognizer', { error: error.message }); - process.send({ type: 'recording-stopped' }); - cleanup(); - } - } else { - process.send({ type: 'recording-stopped' }); - cleanup(); - } + + process.send({ type: 'recording-stopped' }); + process.send({ type: 'session-stopped', sessionId: 'groq-' + Date.now() }); } -// ── Status ────────────────────────────────────────────────────────────── function getStatus() { return { isRecording, - isInitialized: !!speechConfig, + isInitialized: available, available, sessionDuration: sessionStartTime ? Date.now() - sessionStartTime : 0, retryCount }; } -// ── Test connection ───────────────────────────────────────────────────── function testConnection() { - if (!speechConfig) { - process.send({ type: 'test-result', success: false, message: 'Speech service not initialized' }); + if (!available) { + process.send({ type: 'test-result', success: false, message: 'Groq not initialized' }); return; } - try { - // Simple validation — just creating a recognizer tests credential format - const testPush = sdk.AudioInputStream.createPushStream(); - const testAudio = sdk.AudioConfig.fromStreamInput(testPush); - const testRec = new sdk.SpeechRecognizer(speechConfig, testAudio); - testRec.close(); - try { testAudio.close(); } catch (_) {} - try { testPush.close(); } catch (_) {} - process.send({ type: 'test-result', success: true, message: 'Connection test successful' }); - } catch (error) { - process.send({ type: 'test-result', success: false, message: error.message }); - } + process.send({ type: 'test-result', success: true, message: 'Connection test successful' }); } -// ── IPC message handler ───────────────────────────────────────────────── process.on('message', (msg) => { try { switch (msg.type) { - case 'init': - initialize(msg.config); - break; - case 'start': - startRecording(); - break; - case 'stop': - stopRecording(); - break; - case 'test': - testConnection(); - break; - case 'status': - process.send({ type: 'status', status: getStatus() }); - break; + case 'init': initialize(msg.config); break; + case 'start': startRecording(); break; + case 'stop': stopRecording(); break; + case 'test': testConnection(); break; + case 'status': process.send({ type: 'status', status: getStatus() }); break; case 'shutdown': stopRecording(); - cleanup(); log('info', 'Worker shutting down'); setTimeout(() => process.exit(0), 500); break; - default: - log('warn', `Unknown message type: ${msg.type}`); + default: log('warn', `Unknown message type: ${msg.type}`); } } catch (error) { - log('error', `Error handling message ${msg.type}`, { error: error.message, stack: error.stack }); + log('error', `Error handling message ${msg.type}`, { error: error.message }); process.send({ type: 'error', error: `Worker error: ${error.message}` }); } }); -// ── Graceful exit ─────────────────────────────────────────────────────── process.on('SIGTERM', () => { stopRecording(); - cleanup(); process.exit(0); }); - process.on('uncaughtException', (error) => { - log('error', 'Uncaught exception in speech worker', { error: error.message, stack: error.stack }); - process.send({ type: 'error', error: `Worker crash: ${error.message}` }); - // Don't exit — let main process decide + log('error', 'Uncaught exception in speech worker', { error: error.message }); }); - process.on('unhandledRejection', (reason) => { - const msg = reason instanceof Error ? reason.message : String(reason); - log('error', 'Unhandled rejection in speech worker', { error: msg }); + log('error', 'Unhandled rejection in speech worker', { error: String(reason) }); }); - -log('info', 'Speech worker process started', { pid: process.pid }); +log('info', 'Speech worker process started (Groq Whisper)', { pid: process.pid }); diff --git a/src/services/llm.service.js b/src/services/llm.service.js index 5a929126..3aa2b344 100644 --- a/src/services/llm.service.js +++ b/src/services/llm.service.js @@ -44,9 +44,10 @@ class LLMService { getGenerationConfig(overrides = {}) { const defaults = config.get('llm.groq.generation') || {}; const fallback = { - temperature: 0.7, - max_tokens: 4096, - top_p: 0.95 + temperature: 0.4, + max_tokens: 200, + top_p: 0.95, + stop: ["\n\n", "\n-", "\n*", "\n1."] }; const merged = { ...fallback, ...defaults, ...overrides }; @@ -400,7 +401,7 @@ You MUST keep your answers extremely concise. Respond with exactly 1 to 3 short prompt += ` ## Final Response Rules: -1. Always be conversational and direct. +1. Always be conversational, casual, and direct. You MUST use natural human filler words (e.g., 'so', 'just', 'like', 'you know', 'actually', 'well') so it sounds like an unscripted, off-the-cuff verbal response. Do NOT sound like you are reading a textbook. 2. NEVER provide long, detailed responses. Keep it to 1-3 short sentences. 3. If the user asks a coding question, provide a very concise explanation or a brief snippet, but do not write an essay. 4. Remember: DO NOT answer statements made by the interviewee (me). Only answer the interviewer's questions.`; diff --git a/src/services/speech.service.js b/src/services/speech.service.js index d285ad04..5b6636e8 100644 --- a/src/services/speech.service.js +++ b/src/services/speech.service.js @@ -85,16 +85,13 @@ class SpeechService extends EventEmitter { } // Send init message with credentials and config - const subscriptionKey = process.env.AZURE_SPEECH_KEY; - const region = process.env.AZURE_SPEECH_REGION; - const azureConfig = config.get('speech.azure') || {}; + const groqKeyRaw = process.env.GROQ_API_KEY || ''; + const groqKey = groqKeyRaw.split(',')[0].trim(); this.worker.send({ type: 'init', config: { - subscriptionKey, - region, - azure: azureConfig + groqKey } }); From 1411b18b22a14592046f725dc31089401a0fa4d9 Mon Sep 17 00:00:00 2001 From: Matthias Date: Wed, 3 Jun 2026 15:26:50 +0100 Subject: [PATCH 4/9] Fix UI auto-scroll bug that scrolled to top when mic listened --- main.js | 10 +++++++--- speech-worker.js | 27 ++++++++++++++++++++------- src/services/speech.service.js | 4 ++-- src/ui/chat-window.js | 4 ++-- temp_audio.wav | 0 5 files changed, 31 insertions(+), 14 deletions(-) create mode 100644 temp_audio.wav diff --git a/main.js b/main.js index cc67c15d..7a5867a0 100644 --- a/main.js +++ b/main.js @@ -100,6 +100,12 @@ class ApplicationController { await windowManager.initializeWindows(); this.setupGlobalShortcuts(); + + // Auto-start speech recognition and show chat window + speechService.startRecording(); + if (typeof windowManager.showChatWindow === 'function') { + windowManager.showChatWindow(); + } // Initialize default stealth mode with terminal icon this.updateAppIcon("terminal"); @@ -680,9 +686,7 @@ class ApplicationController { if (currentStatus.isRecording) { try { speechService.stopRecording(); - if (typeof windowManager.hideChatWindow === 'function') { - windowManager.hideChatWindow(); - } + // We intentionally do not hide the chat window here so the user can still read it. logger.info("Speech recognition stopped via global shortcut"); } catch (error) { logger.error("Error stopping speech recognition:", error); diff --git a/speech-worker.js b/speech-worker.js index d6c6e2c9..a103aa87 100644 --- a/speech-worker.js +++ b/speech-worker.js @@ -5,7 +5,8 @@ const path = require('path'); const { spawn } = require('child_process'); const Groq = require('groq-sdk'); -let groq = null; +let groqClients = []; +let currentClientIndex = 0; let isRecording = false; let sessionStartTime = null; let recordingProcess = null; @@ -22,14 +23,19 @@ function log(level, message, data) { function initialize(config) { try { - if (!config.groqKey) { + if (!config.groqKeys || config.groqKeys.length === 0) { available = false; process.send({ type: 'init-result', available: false, reason: 'Missing GROQ_API_KEY' }); return; } - groq = new Groq({ apiKey: config.groqKey }); + + groqClients = config.groqKeys.map(key => new Groq({ apiKey: key })); + + // Auto-select Key 2 (index 1) for voice if available, else fallback to index 0 + currentClientIndex = groqClients.length > 1 ? 1 : 0; + available = true; - log('info', 'Groq SDK initialized in worker'); + log('info', 'Groq SDK initialized in worker', { keyCount: groqClients.length, startingIndex: currentClientIndex }); process.send({ type: 'init-result', available: true }); } catch (error) { available = false; @@ -54,8 +60,8 @@ async function runRecordingLoop() { let args = []; // sox format arguments: raw PCM, 16kHz, 16-bit, mono - // we wait for 0.1s of sound > 1%, then stop after 0.8s of silence < 1% - const formatArgs = ['-b', '16', '-e', 'signed', '-c', '1', '-r', '16000', tempWavPath, 'silence', '1', '0.1', '1%', '1', '0.8', '1%']; + // we wait for 0.1s of sound > 1%, then stop after 0.9s of silence < 1% + const formatArgs = ['-b', '16', '-e', 'signed', '-c', '1', '-r', '16000', tempWavPath, 'silence', '1', '0.1', '1%', '1', '0.9', '1%']; if (isWindows) { args = ['-t', 'waveaudio', 'default', '-q', ...formatArgs]; @@ -91,7 +97,7 @@ async function runRecordingLoop() { process.send({ type: 'interim-transcription', text: 'Transcribing...' }); - const transcription = await groq.audio.transcriptions.create({ + const transcription = await groqClients[currentClientIndex].audio.transcriptions.create({ file: fs.createReadStream(tempWavPath), model: 'whisper-large-v3-turbo', response_format: 'text', @@ -107,6 +113,13 @@ async function runRecordingLoop() { } } catch (err) { log('error', 'Groq transcription failed', { error: err.message }); + + // If we hit a rate limit, rotate to the next key automatically + if (err.status === 429 || (err.message && err.message.includes('429'))) { + currentClientIndex = (currentClientIndex + 1) % groqClients.length; + log('warn', `Rate limit hit! Rotating to API Key Index ${currentClientIndex}`); + } + // We don't stop recording on API error, we just keep looping unless it's fatal } diff --git a/src/services/speech.service.js b/src/services/speech.service.js index 5b6636e8..61d27bc2 100644 --- a/src/services/speech.service.js +++ b/src/services/speech.service.js @@ -86,12 +86,12 @@ class SpeechService extends EventEmitter { // Send init message with credentials and config const groqKeyRaw = process.env.GROQ_API_KEY || ''; - const groqKey = groqKeyRaw.split(',')[0].trim(); + const groqKeys = groqKeyRaw.split(',').map(k => k.trim()).filter(k => k); this.worker.send({ type: 'init', config: { - groqKey + groqKeys } }); diff --git a/src/ui/chat-window.js b/src/ui/chat-window.js index 5836ce09..60ee203f 100644 --- a/src/ui/chat-window.js +++ b/src/ui/chat-window.js @@ -509,9 +509,9 @@ class ChatWindowUI { this.updateListeningDuration(); }, 100); - // Auto-scroll to show the listening animation + // Auto-scroll to bottom to show the listening animation if (this.elements.chatMessages) { - this.elements.chatMessages.scrollTop = 0; + this.elements.chatMessages.scrollTop = this.elements.chatMessages.scrollHeight; } } diff --git a/temp_audio.wav b/temp_audio.wav new file mode 100644 index 00000000..e69de29b From e18e82d9f6d20e50997236791613ff218793823b Mon Sep 17 00:00:00 2001 From: Matthias Date: Wed, 3 Jun 2026 17:05:06 +0100 Subject: [PATCH 5/9] Implement true round-robin API key distribution to process concurrent LLM requests in parallel --- src/services/llm.service.js | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/services/llm.service.js b/src/services/llm.service.js index 3aa2b344..d9b41027 100644 --- a/src/services/llm.service.js +++ b/src/services/llm.service.js @@ -427,13 +427,20 @@ You MUST keep your answers extremely concise. Respond with exactly 1 to 3 short let lastError = null; + // Round-robin: grab the next key instantly for this specific request. + // This ensures concurrent requests process in parallel using completely different API keys, preventing bottlenecks. + const requestStartingKeyIndex = this.currentClientIndex; + if (this.clients.length > 0) { + this.currentClientIndex = (this.currentClientIndex + 1) % this.clients.length; + } + // Try each model instantly on rate limit — zero delay rotation for (let i = 0; i < modelPool.length; i++) { payload.model = modelPool[i]; // Try each API key for the current model for (let j = 0; j < this.clients.length; j++) { - const clientIndex = (this.currentClientIndex + j) % this.clients.length; + const clientIndex = (requestStartingKeyIndex + j) % this.clients.length; const currentClient = this.clients[clientIndex]; try { @@ -442,9 +449,6 @@ You MUST keep your answers extremely concise. Respond with exactly 1 to 3 short if (!response.choices || response.choices.length === 0) { throw new Error('Empty response from Groq API'); } - - // Advance the starting index for the next global request to distribute load, or stay. We'll stay to maximize usage till rate limit. - this.currentClientIndex = clientIndex; return response.choices[0].message.content; } catch (error) { From 0c769c1a5f20279ccf0cfc36a547fcee8f10c0e4 Mon Sep 17 00:00:00 2001 From: Matthias Date: Thu, 4 Jun 2026 17:00:51 +0100 Subject: [PATCH 6/9] fix: improve AI interview response quality and UI behavior - Rewrote system prompt to enforce concise, spoken-style answers (2-3 short paragraphs max) - Banned code blocks, bullet points, headers, and essay-style formatting from responses - Reduced max_tokens from 150 to 300 for well-calibrated response length - Removed restrictive stop sequences that were cutting off answers prematurely - Removed code snippet rendering from chat UI (text-only display) - Fixed chat scroll behavior to jump to top of AI response instead of bottom - Reduced message spacing from 16px to 6px for compact layout - Fixed speech-worker silence threshold from 0.1s to 0.9s to prevent interruptions --- chat.html | 25 +++++++++++++++++------- speech-worker.js | 36 +++++++++++++++++----------------- src/core/config.js | 2 +- src/services/llm.service.js | 39 +++++++++++-------------------------- src/ui/chat-window.js | 35 +++++++++++++++++++++++++++------ 5 files changed, 77 insertions(+), 60 deletions(-) diff --git a/chat.html b/chat.html index d986477e..b5794328 100644 --- a/chat.html +++ b/chat.html @@ -110,7 +110,7 @@ /* Removed max-height restriction */ } .message { - margin-bottom: 16px; + margin-bottom: 6px; padding: 12px 16px; background: rgba(255, 255, 255, 0.08); border-radius: 8px; @@ -867,12 +867,16 @@ messageDiv.appendChild(textDiv); chatMessages.appendChild(messageDiv); - chatMessages.scrollTop = chatMessages.scrollHeight; + // Only auto-scroll for non-assistant messages; assistant scroll is handled by renderAssistantResponse + if (type !== 'assistant') { + chatMessages.scrollTop = chatMessages.scrollHeight; + } if (!skipPersist) { chatHistory.push({ kind: 'message', type, text }); saveHistory(); } + return messageDiv; } // Markdown formatter using markdown.js library @@ -1109,7 +1113,7 @@ messageDiv.appendChild(timeDiv); messageDiv.appendChild(textDiv); chatMessages.appendChild(messageDiv); - chatMessages.scrollTop = chatMessages.scrollHeight; + // Don't auto-scroll; renderAssistantResponse handles scroll to top of first element try { if (window.Prism && Prism.highlightElement) { Prism.highlightElement(codeEl); } } catch (_) {} @@ -1117,6 +1121,7 @@ chatHistory.push({ kind: 'snippet', language: lang, code: code || '' }); saveHistory(); } + return messageDiv; } function renderAssistantResponse(response) { if (!response || typeof response !== 'string') return; @@ -1132,12 +1137,18 @@ const blocks = extractCodeBlocks(response); const textOnly = stripCodeBlocks(response, blocks); - // Add text response first (if any) + + // Add text response only - code snippets are not shown in the chat if (textOnly && textOnly.trim().length) { - addMessage(textOnly, 'assistant'); + firstElement = addMessage(textOnly, 'assistant'); + } + + // After all content is rendered, scroll to the FIRST element of the response + if (firstElement) { + setTimeout(() => { + firstElement.scrollIntoView({ behavior: 'smooth', block: 'start' }); + }, 80); } - // Add each code snippet separately - blocks.forEach(b => addCodeSnippet(b.language, b.code)); } // Basic IPC Event Listeners - simplified diff --git a/speech-worker.js b/speech-worker.js index a103aa87..63692c5a 100644 --- a/speech-worker.js +++ b/speech-worker.js @@ -28,12 +28,12 @@ function initialize(config) { process.send({ type: 'init-result', available: false, reason: 'Missing GROQ_API_KEY' }); return; } - + groqClients = config.groqKeys.map(key => new Groq({ apiKey: key })); - + // Auto-select Key 2 (index 1) for voice if available, else fallback to index 0 currentClientIndex = groqClients.length > 1 ? 1 : 0; - + available = true; log('info', 'Groq SDK initialized in worker', { keyCount: groqClients.length, startingIndex: currentClientIndex }); process.send({ type: 'init-result', available: true }); @@ -46,23 +46,23 @@ function initialize(config) { function cleanup() { if (recordingProcess) { - try { recordingProcess.kill('SIGKILL'); } catch (_) {} + try { recordingProcess.kill('SIGKILL'); } catch (_) { } recordingProcess = null; } } async function runRecordingLoop() { if (!isRecording) return; - + const tempWavPath = path.join(__dirname, 'temp_audio.wav'); const isWindows = process.platform === 'win32'; const cmd = 'sox'; let args = []; - + // sox format arguments: raw PCM, 16kHz, 16-bit, mono // we wait for 0.1s of sound > 1%, then stop after 0.9s of silence < 1% - const formatArgs = ['-b', '16', '-e', 'signed', '-c', '1', '-r', '16000', tempWavPath, 'silence', '1', '0.1', '1%', '1', '0.9', '1%']; - + const formatArgs = ['-b', '16', '-e', 'signed', '-c', '1', '-r', '16000', tempWavPath, 'silence', '1', '0.1', '1%', '1', '0.99', '1%']; + if (isWindows) { args = ['-t', 'waveaudio', 'default', '-q', ...formatArgs]; } else { @@ -81,7 +81,7 @@ async function runRecordingLoop() { recordingProcess.on('close', async (code) => { recordingProcess = null; - + if (!isRecording) return; if (code !== 0 && code !== null) { @@ -94,7 +94,7 @@ async function runRecordingLoop() { const stats = fs.statSync(tempWavPath); if (stats.size > 2000) { // Check if it's not basically empty (WAV headers are ~44 bytes) log('debug', 'Uploading audio to Groq Whisper...', { size: stats.size }); - + process.send({ type: 'interim-transcription', text: 'Transcribing...' }); const transcription = await groqClients[currentClientIndex].audio.transcriptions.create({ @@ -105,21 +105,21 @@ async function runRecordingLoop() { }); if (transcription && transcription.trim().length > 0) { - const dur = Date.now() - sessionStartTime; - log('info', 'Final transcription', { text: transcription.trim(), sessionDuration: `${dur}ms` }); - process.send({ type: 'transcription', text: transcription.trim() }); + const dur = Date.now() - sessionStartTime; + log('info', 'Final transcription', { text: transcription.trim(), sessionDuration: `${dur}ms` }); + process.send({ type: 'transcription', text: transcription.trim() }); } } } } catch (err) { log('error', 'Groq transcription failed', { error: err.message }); - + // If we hit a rate limit, rotate to the next key automatically if (err.status === 429 || (err.message && err.message.includes('429'))) { currentClientIndex = (currentClientIndex + 1) % groqClients.length; log('warn', `Rate limit hit! Rotating to API Key Index ${currentClientIndex}`); } - + // We don't stop recording on API error, we just keep looping unless it's fatal } @@ -144,7 +144,7 @@ function startRecording() { retryCount = 0; process.send({ type: 'recording-started' }); process.send({ type: 'session-started', sessionId: 'groq-' + Date.now() }); - + cleanup(); runRecordingLoop(); } @@ -153,10 +153,10 @@ function stopRecording() { if (!isRecording) return; isRecording = false; cleanup(); - + const dur = sessionStartTime ? Date.now() - sessionStartTime : 0; log('info', 'Stopping speech recognition', { sessionDuration: `${dur}ms` }); - + process.send({ type: 'recording-stopped' }); process.send({ type: 'session-stopped', sessionId: 'groq-' + Date.now() }); } diff --git a/src/core/config.js b/src/core/config.js index ed02ab0c..3cd2474d 100644 --- a/src/core/config.js +++ b/src/core/config.js @@ -47,7 +47,7 @@ class ConfigManager { fallbackEnabled: true, generation: { temperature: 0.7, - max_tokens: 150, + max_tokens: 300, top_p: 0.95 } } diff --git a/src/services/llm.service.js b/src/services/llm.service.js index d9b41027..e632df44 100644 --- a/src/services/llm.service.js +++ b/src/services/llm.service.js @@ -45,9 +45,8 @@ class LLMService { const defaults = config.get('llm.groq.generation') || {}; const fallback = { temperature: 0.4, - max_tokens: 200, - top_p: 0.95, - stop: ["\n\n", "\n-", "\n*", "\n1."] + max_tokens: 300, + top_p: 0.95 }; const merged = { ...fallback, ...defaults, ...overrides }; @@ -376,36 +375,20 @@ class LLMService { } getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage, documentContext = null) { - let prompt = `# Intelligent Transcription Response System + let prompt = `You are whispering answers to an interviewee during a live interview. The transcription has both the interviewer and interviewee's voice — ONLY answer the interviewer's questions. Ignore anything the interviewee says. -You are acting as an AI assistant for an interviewee during a live interview. -The transcription you receive will contain BOTH the interviewer's questions AND the interviewee's (my) voice. -CRITICAL INSTRUCTION: You must ONLY respond to the interviewer's questions. If the transcription contains the interviewee (me) answering a question or making a statement, IGNORE IT and do not reply. Do not try to answer my own answers! - -## Brevity & Speed Rule -You MUST keep your answers extremely concise. Respond with exactly 1 to 3 short sentences. DO NOT provide long explanations, lists, or pleasantries unless specifically asked. Short answers ensure the response is generated instantly, which is crucial for a live interview.`; +ABSOLUTE RULES: +- Your answer must be 2-3 SHORT paragraphs. Each paragraph is 2 sentences max. No exceptions, even for complex questions. +- NEVER use bullet points, numbered lists, headers, bold, markdown, or any formatting. Only plain flowing sentences. +- NEVER give each sub-topic its own paragraph. Blend everything together tightly. +- NEVER include code. +- Sound like a confident person speaking casually — use filler words like "so", "actually", "you know", "honestly" naturally. Do not sound like a textbook. +- For simple questions (naming, listing, yes/no), answer in 1-2 sentences only.`; if (documentContext) { - prompt += `\n\n## Reference Document Context\n${documentContext}\n\n## FIRST-PERSON RULE\nYou must adopt a first-person persona based on the reference document context provided above. Speak directly from the perspective of the document's subject or author. Use "I", "me", "my". Do not break character.`; - } - - if (programmingLanguage) { - const lang = String(programmingLanguage).toLowerCase(); - const languageMap = { cpp: 'C++', c: 'C', python: 'Python', java: 'Java', javascript: 'JavaScript', js: 'JavaScript' }; - const fenceTagMap = { cpp: 'cpp', c: 'c', python: 'python', java: 'java', javascript: 'javascript', js: 'javascript' }; - const languageTitle = languageMap[lang] || (lang.charAt(0).toUpperCase() + lang.slice(1)); - const fenceTag = fenceTagMap[lang] || lang || 'text'; - prompt += `\n\nCODING CONTEXT: If writing code, respond ONLY in ${languageTitle}. All code blocks must use triple backticks with language tag \`\`\`${fenceTag}\`\`\`.`; + prompt += `\n\nSpeak as the person described below. Use "I", "me", "my". Stay in character.\n\n${documentContext}`; } - prompt += ` - -## Final Response Rules: -1. Always be conversational, casual, and direct. You MUST use natural human filler words (e.g., 'so', 'just', 'like', 'you know', 'actually', 'well') so it sounds like an unscripted, off-the-cuff verbal response. Do NOT sound like you are reading a textbook. -2. NEVER provide long, detailed responses. Keep it to 1-3 short sentences. -3. If the user asks a coding question, provide a very concise explanation or a brief snippet, but do not write an essay. -4. Remember: DO NOT answer statements made by the interviewee (me). Only answer the interviewer's questions.`; - return prompt; } diff --git a/src/ui/chat-window.js b/src/ui/chat-window.js index 60ee203f..34256869 100644 --- a/src/ui/chat-window.js +++ b/src/ui/chat-window.js @@ -325,7 +325,7 @@ class ChatWindowUI { } } - addMessage(text, type = 'user') { + addMessage(text, type = 'user', autoScroll = true) { if (!this.elements.chatMessages) { console.error('❌ Chat messages element not found!'); return; @@ -353,8 +353,17 @@ class ChatWindowUI { this.elements.chatMessages.appendChild(messageDiv); - // Auto-scroll to bottom - this.elements.chatMessages.scrollTop = this.elements.chatMessages.scrollHeight; + if (autoScroll) { + // Scroll behavior + if (type === 'assistant') { + // For AI responses, scroll to the top of the new message so user can start reading immediately + messageDiv.scrollIntoView({ behavior: 'smooth', block: 'start' }); + } else { + // For user messages and system messages, scroll to bottom + this.elements.chatMessages.scrollTop = this.elements.chatMessages.scrollHeight; + } + } + return messageDiv; } // Split AI response into plain text and code snippets and append to chat @@ -362,10 +371,23 @@ class ChatWindowUI { if (!response || typeof response !== 'string') return; const blocks = this.extractCodeBlocks(response); const textOnly = this.stripCodeBlocks(response, blocks); + + let firstElement = null; + if (textOnly && textOnly.trim().length) { - this.addMessage(textOnly, 'assistant'); + firstElement = this.addMessage(textOnly, 'assistant', false); + } + + blocks.forEach(b => { + const el = this.addCodeSnippet(b.language, b.code); + if (!firstElement) firstElement = el; + }); + + if (firstElement) { + setTimeout(() => { + firstElement.scrollIntoView({ behavior: 'smooth', block: 'start' }); + }, 100); } - blocks.forEach(b => this.addCodeSnippet(b.language, b.code)); } extractCodeBlocks(text) { @@ -402,7 +424,8 @@ class ChatWindowUI { messageDiv.appendChild(timeDiv); messageDiv.appendChild(textDiv); this.elements.chatMessages.appendChild(messageDiv); - this.elements.chatMessages.scrollTop = this.elements.chatMessages.scrollHeight; + // Do not auto-scroll here to prevent jumping past the top of the assistant's response + return messageDiv; } escapeHtmlForSnippet(text) { From b3860454a7e9a4d1c72f3c8d78813d88b31f9601 Mon Sep 17 00:00:00 2001 From: Matthias Date: Thu, 4 Jun 2026 18:43:34 +0100 Subject: [PATCH 7/9] feat: add manual AI response length control to chat UI --- chat.html | 33 +++++++++ main.js | 5 ++ preload.js | 3 +- src/core/config.js | 4 +- src/managers/session.manager.js | 15 ++++ src/services/llm.service.js | 123 +++++++++++++++++++------------- 6 files changed, 131 insertions(+), 52 deletions(-) diff --git a/chat.html b/chat.html index b5794328..68b88663 100644 --- a/chat.html +++ b/chat.html @@ -665,6 +665,27 @@ transform: translateY(0); } } + + .response-mode-select { + background: rgba(255, 255, 255, 0.1); + color: rgba(255, 255, 255, 0.8); + border: 1px solid rgba(255, 255, 255, 0.2); + border-radius: 4px; + padding: 4px 8px; + font-size: 12px; + outline: none; + cursor: pointer; + margin-right: 8px; + font-family: inherit; + transition: all 0.2s ease; + } + .response-mode-select:hover { + background: rgba(255, 255, 255, 0.15); + } + .response-mode-select option { + background: #1e1e1e; + color: #fff; + } @@ -676,6 +697,11 @@
+ @@ -726,6 +752,13 @@ const listeningDuration = document.getElementById('listeningDuration'); const interimOverlay = document.getElementById('interimOverlay'); const clearHistoryBtn = document.getElementById('clearHistoryBtn'); + const responseModeSelect = document.getElementById('responseModeSelect'); + + if (responseModeSelect && window.api) { + responseModeSelect.addEventListener('change', (e) => { + window.api.send('set-response-mode', e.target.value); + }); + } let isRecording = false; let isInteractive = true; diff --git a/main.js b/main.js index 7a5867a0..7a8100ad 100644 --- a/main.js +++ b/main.js @@ -655,6 +655,11 @@ class ApplicationController { windowManager.broadcastToAllWindows("skill-updated", { skill }); }); + // Handle response mode change + ipcMain.on("set-response-mode", (event, mode) => { + sessionManager.setResponseMode(mode); + }); + // Handle quit app (alternative method) ipcMain.on("quit-app", () => { logger.info("Quit app requested via IPC (on method)"); diff --git a/preload.js b/preload.js index d9240b4c..4d088087 100644 --- a/preload.js +++ b/preload.js @@ -115,7 +115,8 @@ contextBridge.exposeInMainWorld('api', { 'toggle-recording', 'toggle-interaction-mode', 'update-skill', - 'window-loaded' + 'window-loaded', + 'set-response-mode' ]; if (validChannels.includes(channel)) { ipcRenderer.send(channel, data); diff --git a/src/core/config.js b/src/core/config.js index 3cd2474d..7861ff0a 100644 --- a/src/core/config.js +++ b/src/core/config.js @@ -18,7 +18,7 @@ class ConfigManager { isDevelopment: this.env === 'development', isProduction: this.env === 'production' }, - + window: { defaultWidth: 400, defaultHeight: 600, @@ -47,7 +47,7 @@ class ConfigManager { fallbackEnabled: true, generation: { temperature: 0.7, - max_tokens: 300, + max_tokens: 350, top_p: 0.95 } } diff --git a/src/managers/session.manager.js b/src/managers/session.manager.js index 198bc044..86439a4d 100644 --- a/src/managers/session.manager.js +++ b/src/managers/session.manager.js @@ -9,6 +9,7 @@ class SessionManager { this.maxSize = config.get('session.maxMemorySize'); this.compressionThreshold = config.get('session.compressionThreshold'); this.currentSkill = 'dsa'; // Default skill is DSA + this.responseMode = 'complex'; // Default to complex this.isInitialized = false; this.initializeWithSkillPrompts(); @@ -79,6 +80,20 @@ class SessionManager { }); } + /** + * Set the AI response mode length + */ + setResponseMode(mode) { + if (['simple', 'medium', 'complex'].includes(mode)) { + this.responseMode = mode; + logger.info(`Response mode changed to ${mode}`); + } + } + + getResponseMode() { + return this.responseMode || 'complex'; + } + /** * Add a conversation event with proper role classification */ diff --git a/src/services/llm.service.js b/src/services/llm.service.js index e632df44..cbe279bb 100644 --- a/src/services/llm.service.js +++ b/src/services/llm.service.js @@ -10,16 +10,16 @@ class LLMService { this.isInitialized = false; this.requestCount = 0; this.errorCount = 0; - + this.initializeClient(); } initializeClient() { const apiKeyString = config.getApiKey('GROQ') || ''; const apiKeys = apiKeyString.split(',').map(k => k.trim()).filter(k => k && k !== 'your-api-key-here' && k !== 'your_groq_api_key_here'); - + if (apiKeys.length === 0) { - logger.warn('Groq API key not configured', { + logger.warn('Groq API key not configured', { keyExists: false }); return; @@ -29,14 +29,14 @@ class LLMService { this.clients = apiKeys.map(apiKey => new Groq({ apiKey })); this.currentClientIndex = 0; this.isInitialized = true; - + logger.info('Groq AI clients initialized successfully', { keyCount: apiKeys.length, model: config.get('llm.groq.model') }); } catch (error) { - logger.error('Failed to initialize Groq clients', { - error: error.message + logger.error('Failed to initialize Groq clients', { + error: error.message }); } } @@ -45,7 +45,7 @@ class LLMService { const defaults = config.get('llm.groq.generation') || {}; const fallback = { temperature: 0.4, - max_tokens: 300, + max_tokens: 350, top_p: 0.95 }; @@ -73,7 +73,7 @@ class LLMService { try { const { promptLoader } = require('../../prompt-loader'); const skillPrompt = promptLoader.getSkillPrompt(activeSkill, programmingLanguage) || ''; - + const base64Image = imageBuffer.toString('base64'); const imageUrl = `data:${mimeType};base64,${base64Image}`; @@ -144,7 +144,7 @@ class LLMService { const startTime = Date.now(); this.requestCount++; - + try { logger.info('Processing text with LLM', { activeSkill, @@ -155,7 +155,7 @@ class LLMService { const messages = this.buildGroqRequest(text, activeSkill, sessionMemory, programmingLanguage); const responseText = await this.executeRequest(messages); - + const finalResponse = programmingLanguage ? this.enforceProgrammingLanguage(responseText, programmingLanguage) : responseText; @@ -188,7 +188,7 @@ class LLMService { if (config.get('llm.groq.fallbackEnabled')) { return this.generateFallbackResponse(text, activeSkill); } - + throw error; } } @@ -200,7 +200,7 @@ class LLMService { const startTime = Date.now(); this.requestCount++; - + try { logger.info('Processing transcription with intelligent response', { activeSkill, @@ -210,7 +210,7 @@ class LLMService { const messages = this.buildIntelligentTranscriptionRequest(text, activeSkill, sessionMemory, programmingLanguage); const responseText = await this.executeRequest(messages); - + const finalResponse = programmingLanguage ? this.enforceProgrammingLanguage(responseText, programmingLanguage) : responseText; @@ -237,7 +237,7 @@ class LLMService { if (config.get('llm.groq.fallbackEnabled')) { return this.generateIntelligentFallbackResponse(text, activeSkill); } - + throw error; } } @@ -264,7 +264,7 @@ class LLMService { buildGroqRequest(text, activeSkill, sessionMemory, programmingLanguage) { const sessionManager = require('../managers/session.manager'); - + if (sessionManager && typeof sessionManager.getConversationHistory === 'function') { const conversationHistory = sessionManager.getConversationHistory(4); const skillContext = sessionManager.getSkillContext(activeSkill, programmingLanguage); @@ -272,8 +272,8 @@ class LLMService { } const requestComponents = promptLoader.getRequestComponents( - activeSkill, - text, + activeSkill, + text, sessionMemory, programmingLanguage ); @@ -326,7 +326,7 @@ class LLMService { } const sessionManager = require('../managers/session.manager'); - + if (sessionManager && typeof sessionManager.getConversationHistory === 'function') { const conversationHistory = sessionManager.getConversationHistory(4); const skillContext = sessionManager.getSkillContext(activeSkill, programmingLanguage); @@ -336,7 +336,7 @@ class LLMService { const messages = []; const documentContext = sessionManager ? sessionManager.getDocumentContext() : null; const intelligentPrompt = this.getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage, documentContext); - + if (intelligentPrompt) { messages.push({ role: 'system', content: intelligentPrompt }); } @@ -350,7 +350,7 @@ class LLMService { const sessionManager = require('../managers/session.manager'); const documentContext = sessionManager.getDocumentContext(); const intelligentPrompt = this.getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage, documentContext); - + if (intelligentPrompt) { messages.push({ role: 'system', content: intelligentPrompt }); } @@ -375,15 +375,33 @@ class LLMService { } getIntelligentTranscriptionPrompt(activeSkill, programmingLanguage, documentContext = null) { + const sessionManager = require('../managers/session.manager'); + const mode = sessionManager.getResponseMode(); let prompt = `You are whispering answers to an interviewee during a live interview. The transcription has both the interviewer and interviewee's voice — ONLY answer the interviewer's questions. Ignore anything the interviewee says. -ABSOLUTE RULES: +ABSOLUTE RULES:`; + + if (mode === 'simple') { + prompt += ` +- Answer in 1-2 sentences only. Be extremely concise. +- NEVER use bullet points, numbered lists, headers, bold, markdown, or any formatting. Only plain flowing sentences. +- NEVER include code. +- Sound like a confident person speaking casually.`; + } else if (mode === 'medium') { + prompt += ` +- Your answer must be 1-2 short paragraphs. +- NEVER use bullet points, numbered lists, headers, bold, markdown, or any formatting. Only plain flowing sentences. +- NEVER include code. +- Sound like a confident person speaking casually.`; + } else { + prompt += ` - Your answer must be 2-3 SHORT paragraphs. Each paragraph is 2 sentences max. No exceptions, even for complex questions. - NEVER use bullet points, numbered lists, headers, bold, markdown, or any formatting. Only plain flowing sentences. - NEVER give each sub-topic its own paragraph. Blend everything together tightly. - NEVER include code. - Sound like a confident person speaking casually — use filler words like "so", "actually", "you know", "honestly" naturally. Do not sound like a textbook. - For simple questions (naming, listing, yes/no), answer in 1-2 sentences only.`; + } if (documentContext) { prompt += `\n\nSpeak as the person described below. Use "I", "me", "my". Stay in character.\n\n${documentContext}`; @@ -397,6 +415,12 @@ ABSOLUTE RULES: } async executeRequest(messages, isVision = false) { + const sessionManager = require('../managers/session.manager'); + const mode = sessionManager.getResponseMode(); + let maxTokens = 450; + if (mode === 'simple') maxTokens = 150; + if (mode === 'medium') maxTokens = 250; + // Fast model rotation pool — each model has independent rate limits on Groq free tier const modelPool = isVision ? ['llama-3.2-11b-vision-preview'] @@ -405,7 +429,8 @@ ABSOLUTE RULES: const payload = { messages, model: modelPool[0], - ...this.getGenerationConfig() + ...this.getGenerationConfig(), + max_tokens: maxTokens }; let lastError = null; @@ -420,24 +445,24 @@ ABSOLUTE RULES: // Try each model instantly on rate limit — zero delay rotation for (let i = 0; i < modelPool.length; i++) { payload.model = modelPool[i]; - + // Try each API key for the current model for (let j = 0; j < this.clients.length; j++) { const clientIndex = (requestStartingKeyIndex + j) % this.clients.length; const currentClient = this.clients[clientIndex]; - + try { const response = await currentClient.chat.completions.create(payload); - + if (!response.choices || response.choices.length === 0) { throw new Error('Empty response from Groq API'); } - + return response.choices[0].message.content; } catch (error) { lastError = error; const errorInfo = this.analyzeError(error); - + logger.warn(`Groq model ${payload.model} failed on API key index ${clientIndex}`, { error: error.message, errorType: errorInfo.type, @@ -447,23 +472,23 @@ ABSOLUTE RULES: // If rate limited, instantly try next key for the SAME model if (errorInfo.type === 'RATE_LIMIT_ERROR') { - continue; + continue; } - + // If auth error (e.g. invalid key), instantly try next key if (errorInfo.type === 'AUTH_ERROR') { continue; } - + // For other errors (like model decommissioned), break inner loop to move to next model - break; + break; } } - + // If we got here, all keys for this model failed. if (lastError) { const errorInfo = this.analyzeError(lastError); - + // If rate limited across all keys, switch model instantly if (errorInfo.type === 'RATE_LIMIT_ERROR' && i < modelPool.length - 1) { logger.info(`Rate limited across all keys for ${payload.model}, instantly switching to ${modelPool[i + 1]}`); @@ -482,16 +507,16 @@ ABSOLUTE RULES: } } } - + throw new Error(`All Groq models and keys exhausted. Last error: ${lastError ? lastError.message : 'Unknown'}`); } async performPreflightCheck() { try { - await this.testNetworkConnection({ - host: 'api.groq.com', - port: 443, - name: 'Groq API Endpoint' + await this.testNetworkConnection({ + host: 'api.groq.com', + port: 443, + name: 'Groq API Endpoint' }); } catch (error) { logger.warn('Preflight check failed', { error: error.message }); @@ -500,19 +525,19 @@ ABSOLUTE RULES: analyzeError(error) { const errorMessage = error.message.toLowerCase(); - + if (errorMessage.includes('fetch failed') || errorMessage.includes('network error') || errorMessage.includes('timeout')) { return { type: 'NETWORK_ERROR', isNetworkError: true }; } - + if (errorMessage.includes('unauthorized') || errorMessage.includes('invalid api key')) { return { type: 'AUTH_ERROR', isNetworkError: false }; } - + if (errorMessage.includes('rate limit') || errorMessage.includes('too many requests')) { return { type: 'RATE_LIMIT_ERROR', isNetworkError: false }; } - + return { type: 'UNKNOWN_ERROR', isNetworkError: false }; } @@ -542,7 +567,7 @@ ABSOLUTE RULES: return new Promise((resolve, reject) => { const net = require('net'); const socket = new net.Socket(); - + const timeout = setTimeout(() => { socket.destroy(); reject(new Error(`Connection timeout to ${host}:${port}`)); @@ -572,7 +597,7 @@ ABSOLUTE RULES: }; const response = fallbackResponses[activeSkill] || fallbackResponses.default; - + return { response, metadata: { @@ -612,15 +637,15 @@ ABSOLUTE RULES: max_tokens: 10 }); const latency = Date.now() - startTime; - - return { - success: true, + + return { + success: true, response: response.choices[0].message.content, latency }; } catch (error) { - return { - success: false, + return { + success: false, error: error.message }; } From c3facb22787d85d22f5b04af7133d1ffc4470c2e Mon Sep 17 00:00:00 2001 From: Matthias Date: Thu, 4 Jun 2026 22:51:09 +0100 Subject: [PATCH 8/9] feat: custom dropdowns, screenshot routing to chat, shortcuts, cursor fix, Llama 4 vision model --- chat.html | 299 +++++++++++++++++++++++++++++--- llm-response.html | 8 +- main.js | 71 +++++++- preload.js | 6 +- src/core/config.js | 2 +- src/services/capture.service.js | 4 +- src/services/llm.service.js | 29 +++- src/ui/chat-window.js | 6 + 8 files changed, 378 insertions(+), 47 deletions(-) diff --git a/chat.html b/chat.html index 68b88663..afdef1b6 100644 --- a/chat.html +++ b/chat.html @@ -45,7 +45,7 @@ -webkit-app-region: drag; background: rgba(0, 0, 0, 0.2); backdrop-filter: blur(10px); - cursor: move; + cursor: default; flex-shrink: 0; } .header-title { @@ -70,7 +70,7 @@ color: rgba(255, 255, 255, 0.9); border-radius: 6px; padding: 6px 10px; - cursor: pointer; + cursor: default; transition: all 0.2s ease; } .icon-button:hover { @@ -288,7 +288,7 @@ padding: 4px 8px; font-size: 11px; border-radius: 4px; - cursor: pointer; + cursor: default; -webkit-app-region: no-drag; transition: background 0.2s ease, border-color 0.2s ease, transform 0.12s ease; } @@ -450,7 +450,7 @@ border-radius: 6px; padding: 6px 10px; color: rgba(255, 255, 255, 0.9); - cursor: pointer; + cursor: default; transition: all 0.2s; -webkit-app-region: no-drag; } @@ -479,7 +479,7 @@ border-radius: 6px; padding: 6px 10px; color: rgba(255, 255, 255, 0.9); - cursor: pointer; + cursor: default; transition: all 0.2s; -webkit-app-region: no-drag; } @@ -527,15 +527,10 @@ } .non-interactive .input-container { pointer-events: none; - opacity: 0.5; - } - .non-interactive .mic-button { - pointer-events: none; - opacity: 0.5; + opacity: 0; } - .non-interactive .send-button { - pointer-events: none; - opacity: 0.5; + .non-interactive .icon-button { + display: none; } /* Minimalist Listening Animation */ @@ -666,7 +661,14 @@ } } - .response-mode-select { + /* Custom dropdown to avoid native OS select (which leaks through setContentProtection) */ + .custom-dropdown { + position: relative; + display: inline-block; + margin-right: 8px; + -webkit-app-region: no-drag; + } + .custom-dropdown-btn { background: rgba(255, 255, 255, 0.1); color: rgba(255, 255, 255, 0.8); border: 1px solid rgba(255, 255, 255, 0.2); @@ -674,18 +676,139 @@ padding: 4px 8px; font-size: 12px; outline: none; - cursor: pointer; - margin-right: 8px; + cursor: default; font-family: inherit; transition: all 0.2s ease; + display: flex; + align-items: center; + gap: 4px; + user-select: none; } - .response-mode-select:hover { + .custom-dropdown-btn:hover { background: rgba(255, 255, 255, 0.15); } - .response-mode-select option { - background: #1e1e1e; + .custom-dropdown-btn .arrow { + font-size: 8px; + transition: transform 0.2s ease; + } + .custom-dropdown.open .custom-dropdown-btn .arrow { + transform: rotate(180deg); + } + .custom-dropdown-menu { + display: none; + position: absolute; + top: 100%; + left: 0; + margin-top: 2px; + background: rgba(30, 30, 30, 0.95); + backdrop-filter: blur(15px); + border: 1px solid rgba(255, 255, 255, 0.15); + border-radius: 6px; + min-width: 130px; + z-index: 9999; + overflow: hidden; + box-shadow: 0 4px 16px rgba(0,0,0,0.4); + } + .custom-dropdown.open .custom-dropdown-menu { + display: block; + animation: dropIn 0.15s ease-out; + } + @keyframes dropIn { + from { opacity: 0; transform: translateY(-4px); } + to { opacity: 1; transform: translateY(0); } + } + .custom-dropdown-item { + padding: 6px 12px; + font-size: 12px; + color: rgba(255,255,255,0.8); + cursor: default; + transition: background 0.15s ease; + } + .custom-dropdown-item:hover { + background: rgba(255,255,255,0.1); + } + .custom-dropdown-item.selected { + background: rgba(33, 150, 243, 0.3); color: #fff; } + .non-interactive .custom-dropdown { + display: none; + } + + /* Shortcut Help Overlay */ + .shortcut-overlay { + display: none; + position: fixed; + top: 0; + left: 0; + width: 100%; + height: 100%; + background: rgba(0, 0, 0, 0.85); + backdrop-filter: blur(20px); + z-index: 10000; + justify-content: center; + align-items: center; + -webkit-app-region: no-drag; + } + .shortcut-overlay.visible { + display: flex; + animation: fadeIn 0.2s ease-out; + } + @keyframes fadeIn { + from { opacity: 0; } + to { opacity: 1; } + } + .shortcut-card { + background: rgba(30, 30, 30, 0.95); + border: 1px solid rgba(255, 255, 255, 0.12); + border-radius: 12px; + padding: 24px 28px; + max-width: 380px; + width: 90%; + box-shadow: 0 8px 32px rgba(0, 0, 0, 0.5); + } + .shortcut-card h2 { + margin: 0 0 16px 0; + font-size: 15px; + font-weight: 600; + color: rgba(255, 255, 255, 0.95); + text-align: center; + } + .shortcut-row { + display: flex; + justify-content: space-between; + align-items: center; + padding: 6px 0; + border-bottom: 1px solid rgba(255, 255, 255, 0.06); + } + .shortcut-row:last-child { + border-bottom: none; + } + .shortcut-label { + font-size: 12px; + color: rgba(255, 255, 255, 0.7); + } + .shortcut-key { + font-size: 11px; + color: rgba(255, 255, 255, 0.9); + background: rgba(255, 255, 255, 0.1); + border: 1px solid rgba(255, 255, 255, 0.2); + border-radius: 4px; + padding: 2px 8px; + font-family: monospace; + } + .shortcut-mode-indicator { + text-align: center; + margin-top: 12px; + padding-top: 12px; + border-top: 1px solid rgba(255, 255, 255, 0.1); + font-size: 11px; + color: rgba(255, 255, 255, 0.5); + } + .shortcut-mode-indicator span { + color: #64b5f6; + font-weight: 600; + } @@ -697,11 +820,30 @@
- +
+ +
+
Auto-Detect Lang
+
C++
+
Python
+
Java
+
JavaScript
+
+
+
+ +
+
Simple
+
Medium
+
Complex
+
+
@@ -729,6 +871,25 @@
+ + + +
+
+

⌨ Keyboard Shortcuts

+
Screenshot & AnalyzeCtrl+S
+
Screenshot (Alt)Ctrl+Shift+S
+
Cycle Response ModeCtrl+Shift+X
+
Toggle RecordingAlt+R
+
Toggle Stealth ModeAlt+A
+
Show/Hide WindowCtrl+Shift+V
+
Open ChatCtrl+Shift+C
+
Clear SessionCtrl+Shift+\
+
SettingsCtrl+,
+
This MenuCtrl+Shift+Z
+
Current mode: Complex
+
+
@@ -752,11 +913,93 @@ const listeningDuration = document.getElementById('listeningDuration'); const interimOverlay = document.getElementById('interimOverlay'); const clearHistoryBtn = document.getElementById('clearHistoryBtn'); - const responseModeSelect = document.getElementById('responseModeSelect'); + // Custom dropdown logic (replaces native