diff --git a/.commandcode/taste/taste.md b/.commandcode/taste/taste.md new file mode 100644 index 0000000..f562cac --- /dev/null +++ b/.commandcode/taste/taste.md @@ -0,0 +1,4 @@ +# Taste (Continuously Learned by [CommandCode][cmd]) + +[cmd]: https://commandcode.ai/ + diff --git a/AGENTS.md b/AGENTS.md index b576a0e..fe9f8be 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -9,7 +9,11 @@ MVP implemented. All core modules are built and tested: - Retrieval pipeline (vector + hybrid keyword/vector) - CLI (index, query, clear, status, list, show, dump) - OpenCode plugin (chat.message hook + auto-context injection + background auto-indexing + read-override) -- Test suite (511 tests, 0 failures) +- TUI settings menu (model selection for embedding/description providers) +- Runtime overrides system (`runtime-overrides.json`) for live config changes +- API key auto-resolution from OpenCode provider config +- Manifest schema versioning with corruption detection +- Test suite (589 tests, 1 integration test requiring opencode binary) Design docs: `ReadMe.md` (project docs), `PLANNING.md` (roadmap + brainstorming), `docs/designs/2026-05-28-rag-plugin-mvp-design.md` (architecture design). @@ -318,6 +322,30 @@ OpenCode config. Instead, rely on `.opencode/plugins/*.js` auto-discovery: - Re-export syntax (`export { X as default } from ...`) produces the same result but is harder to inspect with DevTools or stack traces. +### Runtime overrides (`runtime-overrides.json`) +- The TUI settings menu writes to `${storePath}/runtime-overrides.json`. The plugin and `createRagHooks()` periodically reload these overrides (TTL: 5s) via `loadRuntimeOverrides()` + `applyRuntimeOverrides()`. +- Override values take precedence over `opencode-rag.json` config values. Supported overrides: retrieval settings (`topK`, `minScore`, `maxChunks`), description settings (`enabled`, `provider`, `model`, `baseUrl`), and embedding settings (`provider`, `model`, `baseUrl`). +- `saveRuntimeOverride()` in `src/core/runtime-overrides.ts` supports `boolean`, `number`, and `string` values. +- The TUI prompt for numeric settings (`maxChunks`) and boolean toggles all persist to both `runtime-overrides.json` AND `opencode-rag.json` for consistency. + +### TUI Settings Menu +- The TUI plugin (`src/tui.ts`) registers a settings panel accessed from the OpenCode sidebar. +- Categories: Retrieval, Embedding, LLM Descriptions. +- Embedding and Description settings include a **model picker** dropdown populated from OpenCode's registered providers (reads `api.state.provider`). Models are grouped by provider name, sorted alphabetically, with a "Custom…" option for manual entry. +- Selecting a model auto-sets the corresponding provider (`ollama`/`openai`) and base URL (derived from the OpenCode provider config). +- The TUI also provides a prompt-based editor for string/number settings and toggle switches for booleans. + +### Manifest schema versioning +- `src/core/manifest.ts` now includes `SCHEMA_VERSION = 1` and a `schemaVersion` field in `FileManifest`. +- `loadManifest()` checks `parsed.schemaVersion === SCHEMA_VERSION`. If the version doesn't match, it returns `status: "corrupt"`, triggering a full index rebuild. +- `createEmptyManifest()` and `saveManifest()` always set `schemaVersion = SCHEMA_VERSION`. +- This prevents data corruption issues when the manifest format changes between versions. + +### API key resolution from OpenCode provider config +- `resolveApiKeyFromProviderConfig()` in `src/plugin.ts` reads OpenCode config files (`.opencode/opencode.json`, `opencode.json`, `~/.config/opencode/opencode.jsonc`) to find an `apiKey` for the `openai` provider. +- If the embedding or description provider is `"openai"` but no `apiKey` is set in `opencode-rag.json`, the plugin auto-resolves it from the OpenCode config. +- Config files may contain JSONC comments β€” they are stripped before parsing. + ## Adding a New Language Chunker 1. Create `src/chunker/.ts` extending `TreeSitterChunker` @@ -360,6 +388,7 @@ both semantic meaning and code-level similarity. - On LLM failure, falls back to embedding raw content and logs a warning - Set `description.enabled: false` in config to disable and embed raw code instead - Config is in `src/core/config.ts` (`DescriptionConfig`), provider in `src/describer/` +- Chunk descriptions now include relative path and line ranges (e.g. `src/foo.ts, lines 10-42`) even when LLM description is disabled, improving context ## OpenCodeRAG Plugin diff --git a/PLANNING.md b/PLANNING.md index 246a92b..c20d40f 100644 --- a/PLANNING.md +++ b/PLANNING.md @@ -10,7 +10,8 @@ - [x] Line-based fallback chunking for unsupported formats - [x] Pluggable chunkers via `Chunker` interface and config-loaded custom chunkers (`loadChunkersFromConfig()`) - [x] Incremental indexing (file-hash-based, manifest-backed, diff-aware) -- [x] File watching and background re-indexing with debounced, serialized passes +- [x] File watching and background re-indexing with debounced, serialized passes, watcher status file +- [x] Enhanced chunk descriptions with relative paths and line numbers in both LLM and non-LLM modes ### Embedding & Storage @@ -21,6 +22,8 @@ - [x] Pluggable storage via `VectorStore` interface - [x] Pluggable embedders via `EmbeddingProvider` interface - [x] Batch embedding (configurable batch size) +- [x] Auto-detection of LanceDB schema (`tableHasDescriptionColumn()`) for seamless upgrades +- [x] Robust `clear()` via `dropDatabase()` ### Retrieval @@ -35,24 +38,29 @@ - [x] `opencode-rag-context` tool for chunk-level retrieval - [x] `chat.message` hook with file suggestions and auto-injection - [x] RAG-backed read override tool β€” shadows OpenCode's built-in read, appends related code chunks and suggests related files when retrieval finds relevant results -- [x] TUI plugin module (OpenTUI + Solid.js sidebar panel) +- [x] TUI plugin module (OpenTUI + Solid.js sidebar panel) with model picker dropdowns for embedding/description providers - [x] `PluginModule` export pattern for OpenCode v1.17.0 compatibility -- [x] Background auto-indexing via `createBackgroundIndexer()` +- [x] Background auto-indexing via `createBackgroundIndexer()` with watcher status file +- [x] API key auto-resolution from OpenCode provider config files ### CLI & Distribution -- [x] CLI (`init`, `index`, `query`, `clear`, `status` via commander) +- [x] CLI (`init`, `index`, `query`, `clear`, `status`, `list`, `show`, `dump` via commander) - [x] Full `init` command lifecycle: generates `.opencode/plugins/rag-plugin.js` + `rag-tui.js`, `.gitignore`, `package.json`; runs `npm install`; cleans stale global plugin registrations; `--skip-install` flag - [x] Install scripts (`install.ps1` / `install.sh`) β€” build, pack, install to `~/.opencode/`, register in `opencode.jsonc`, CLI wrapper, full uninstall mode - [x] Release automation script (`scripts/release-patch.js` with `--dry` support) - [x] Multi-entry package exports: plugin, server, library, TUI - [x] Published npm package: `opencode-rag-plugin` +- [x] CLI query results deduplication +- [x] `clear` command uses `store.dropDatabase()` for clean slate ### Config & Quality - [x] JSON config with deep-merged partial overrides +- [x] Runtime overrides system (`runtime-overrides.json`) for live TUI config changes with 5s TTL - [x] Configurable file logging -- [x] Expanded automated test suite (511+ tests, Node built-in runner) +- [x] Manifest schema versioning with corruption detection and automatic rebuild +- [x] Expanded automated test suite (589+ tests, Node built-in runner) ## Short Term @@ -94,7 +102,8 @@ and safely rebuilds if manifest is missing or corrupt. Watch mode (`index --watch`) uses chokidar for debounced incremental passes. Passes are serialized. The plugin uses the same scheduling for background -auto-indexing inside OpenCode. +auto-indexing inside OpenCode, now writing `watcher-status.json` to the store +path for observability of background indexing state. ## 2. 🧠 Query Enhancement @@ -218,6 +227,10 @@ Key strengths: - Broad source and document coverage without native grammar build tools - RAG-backed read tool that enriches file reads with related code chunks - Hybrid keyword + vector search with configurable fusion weights +- TUI settings menu with model picker for embedding and description providers +- Runtime overrides system for live config changes without editing JSON files +- API key auto-resolution from OpenCode provider config +- Manifest schema versioning with auto-rebuild on format changes - Install scripts for one-command global setup and uninstall Key next steps: @@ -227,3 +240,4 @@ Key next steps: 3. Context window optimization for better prompt packing 4. Query rewriting and retrieval explainability 5. Persistent session memory across coding sessions +6. Web UI for index inspection and search result browsing diff --git a/ReadMe.md b/ReadMe.md index f5c288e..5b60ff6 100644 --- a/ReadMe.md +++ b/ReadMe.md @@ -110,12 +110,28 @@ Running `opencode-rag init` creates the config file `opencode-rag.json` in your | `openCode.autoInject.minScore` | `0.75` | Minimum relevance score to inject actual code (0–1). | | `retrieval.topK` | `10` | Default number of chunks fetched per query. | | `retrieval.hybridSearch.enabled` | `true` | Enables combined TFΓ—IDF + vector search. | +### TUI Settings Menu -### Description-Based Embedding (Optional) +When running inside OpenCode, the plugin provides a settings panel accessible from the OpenCode sidebar: +- **Retrieval**: `topK`, `minScore`, `maxChunks` +- **Embedding**: Model picker dropdown (populated from OpenCode's registered providers) with custom manual entry +- **LLM Descriptions**: Enable/disable toggle, model picker dropdown -When enabled, the indexer uses an LLM to generate natural-language descriptions of code chunks, then combines the description with the raw code for embedding. This captures both semantic meaning (from the description) and code-level similarity (from the code itself), dramatically improving search quality for natural language and code-based queries alike. +Settings are persisted to `${storePath}/runtime-overrides.json` and take precedence over `opencode-rag.json`. The plugin reloads these on a 5-second TTL. -As this needs more processing power, it is recommended to keep this disabled if you don't use a dedicated GPU for inference. +### API Key Auto-Resolution + +If you set `embedding.provider` or `description.provider` to `"openai"` but don't specify an `apiKey` in `opencode-rag.json`, the plugin automatically resolves the key from OpenCode's own provider configuration (`.opencode/opencode.json`, `opencode.json`, or `~/.config/opencode/opencode.jsonc`). + +### Manifest Schema Versioning + +The manifest file now contains a `schemaVersion` field. If the stored manifest has a mismatched version, a full index rebuild is triggered automatically β€” this prevents silent corruption when the manifest format changes between plugin versions. + +### Description-Based Embedding (Enabled by Default) + +The indexer uses an LLM to generate natural-language descriptions of code chunks, then combines the description with the raw code for embedding. This captures both semantic meaning (from the description) and code-level similarity (from the code itself), dramatically improving search quality for natural language and code-based queries alike. + +> As this needs more processing power, it is recommended to disable it (`description.enabled: false`) if you don't have a dedicated GPU for inference or want to reduce latency during indexing. ```json { @@ -125,7 +141,7 @@ As this needs more processing power, it is recommended to keep this disabled if "baseUrl": "http://localhost:11434/api", "model": "qwen2.5:3b", "timeoutMs": 60000, - "systemPrompt": "You are a code analysis assistant. Given a code snippet, write a short (2-3 sentence) description of what the code does, its purpose, and key functionality. Focus on semantic meaning that would help someone searching for this code. Do not include code in your response." + "systemPrompt": "Describe code for semantic search in short simple words, simple grammar, no code, no comments." } } ``` @@ -138,7 +154,7 @@ As this needs more processing power, it is recommended to keep this disabled if | `description.systemPrompt` | *(see above)* | Customizable system prompt for the LLM. | | `description.timeoutMs` | `60000` | Timeout per LLM call. | -The embedded text is formed as `description + "\n\n" + code content`. The description and code are still stored as separate fields in LanceDB. Keyword search continues to use the raw code content. Set `description.enabled` to `false` to disable and embed raw code content instead. If the LLM call fails during indexing, the chunk falls back to embedding raw content with a warning logged. +The embedded text is formed as `description + "\n\n" + code content`. The description and code are still stored as separate fields in LanceDB. Keyword search continues to use the raw code content. Even when LLM descriptions are disabled, chunk descriptions still include the file path and line range (e.g. `src/foo.ts, lines 10-42`). If the LLM call fails during indexing, the chunk falls back to embedding raw content with a warning logged.
View Logging Configuration diff --git a/opencode-rag.json b/opencode-rag.json index ff6d217..be459e0 100644 --- a/opencode-rag.json +++ b/opencode-rag.json @@ -1,8 +1,8 @@ { "embedding": { - "provider": "ollama", - "baseUrl": "http://127.0.0.1:11434/api", - "model": "embeddinggemma", + "provider": "nvidia", + "baseUrl": "https://integrate.api.nvidia.com/v1", + "model": "nvidia/llama-nemotron-embed-vl-1b-v2", "timeoutMs": 60000, "documentPrefix": "", "queryPrefix": "" @@ -91,11 +91,12 @@ }, "description": { "enabled": true, - "provider": "ollama", - "baseUrl": "http://127.0.0.1:11434/api", - "model": "qwen2.5:3b", + "provider": "openai", + "baseUrl": "https://api.openai.com/v1", + "model": "north-mini-code-free", "timeoutMs": 60000, - "systemPrompt": "Describe code for semantic search in short simple words, simple grammar, no code, no comments. If user message contains multiple chunks labeled === CHUNK N ===, describe each one separately, starting each with CHUNK N: followed by the description. For a single chunk, output description directly." + "systemPrompt": "Describe code for semantic search in short simple words, simple grammar, no code, no comments. If user message contains multiple chunks labeled === CHUNK N ===, describe each one separately, starting each with CHUNK N: followed by the description. For a single chunk, output description directly.", + "apiKey": "public" }, "logging": { "level": "debug", diff --git a/package-lock.json b/package-lock.json index c3ce18e..c6956ca 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "opencode-rag-plugin", - "version": "1.4.2", + "version": "1.5.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "opencode-rag-plugin", - "version": "1.4.2", + "version": "1.5.1", "license": "MIT", "dependencies": { "@e965/xlsx": "^0.20.3", diff --git a/package.json b/package.json index 6c344c6..7f65007 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "opencode-rag-plugin", - "version": "1.4.2", + "version": "1.5.1", "description": "OpenCode plugin for local-first RAG-based semantic code search", "type": "module", "main": "./dist/plugin-entry.js", diff --git a/src/__tests__/core/runtime-overrides.test.ts b/src/__tests__/core/runtime-overrides.test.ts index 0de1c26..a800737 100644 --- a/src/__tests__/core/runtime-overrides.test.ts +++ b/src/__tests__/core/runtime-overrides.test.ts @@ -100,6 +100,35 @@ describe("saveRuntimeOverride", () => { const result = loadRuntimeOverrides(tmpDir); assert.equal(result.description?.enabled, false); }); + + it("handles string values", () => { + saveRuntimeOverride(tmpDir, ["embedding", "model"], "nomic-embed-text"); + const result = loadRuntimeOverrides(tmpDir); + assert.equal(result.embedding?.model, "nomic-embed-text"); + }); + + it("handles string enum values", () => { + saveRuntimeOverride(tmpDir, ["embedding", "provider"], "openai"); + const result = loadRuntimeOverrides(tmpDir); + assert.equal(result.embedding?.provider, "openai"); + }); + + it("overwrites string value with another string", () => { + saveRuntimeOverride(tmpDir, ["embedding", "baseUrl"], "http://localhost:11434/api"); + saveRuntimeOverride(tmpDir, ["embedding", "baseUrl"], "http://custom:8080/api"); + const result = loadRuntimeOverrides(tmpDir); + assert.equal(result.embedding?.baseUrl, "http://custom:8080/api"); + }); + + it("mixes string and boolean overrides", () => { + saveRuntimeOverride(tmpDir, ["embedding", "provider"], "openai"); + saveRuntimeOverride(tmpDir, ["embedding", "model"], "text-embedding-3-small"); + saveRuntimeOverride(tmpDir, ["description", "enabled"], false); + const result = loadRuntimeOverrides(tmpDir); + assert.equal(result.embedding?.provider, "openai"); + assert.equal(result.embedding?.model, "text-embedding-3-small"); + assert.equal(result.description?.enabled, false); + }); }); describe("applyRuntimeOverrides", () => { @@ -221,4 +250,65 @@ describe("applyRuntimeOverrides", () => { }); assert.equal(result.description?.enabled, false); }); + + it("applies embedding.provider override", () => { + const result = applyRuntimeOverrides(DEFAULT_CONFIG, { + embedding: { provider: "openai" }, + }); + assert.equal(result.embedding.provider, "openai"); + }); + + it("applies embedding.model override", () => { + const result = applyRuntimeOverrides(DEFAULT_CONFIG, { + embedding: { model: "text-embedding-3-small" }, + }); + assert.equal(result.embedding.model, "text-embedding-3-small"); + }); + + it("applies embedding.baseUrl override", () => { + const result = applyRuntimeOverrides(DEFAULT_CONFIG, { + embedding: { baseUrl: "https://custom.api.com/v1" }, + }); + assert.equal(result.embedding.baseUrl, "https://custom.api.com/v1"); + }); + + it("applies all embedding overrides simultaneously", () => { + const result = applyRuntimeOverrides(DEFAULT_CONFIG, { + embedding: { provider: "openai", model: "text-embedding-3-small", baseUrl: "https://api.openai.com/v1" }, + }); + assert.equal(result.embedding.provider, "openai"); + assert.equal(result.embedding.model, "text-embedding-3-small"); + assert.equal(result.embedding.baseUrl, "https://api.openai.com/v1"); + }); + + it("applies description.provider override", () => { + const result = applyRuntimeOverrides(DEFAULT_CONFIG, { + description: { provider: "openai" }, + }); + assert.equal(result.description?.provider, "openai"); + }); + + it("applies description.model override", () => { + const result = applyRuntimeOverrides(DEFAULT_CONFIG, { + description: { model: "gpt-4o-mini" }, + }); + assert.equal(result.description?.model, "gpt-4o-mini"); + }); + + it("applies description.baseUrl override", () => { + const result = applyRuntimeOverrides(DEFAULT_CONFIG, { + description: { baseUrl: "https://custom.api.com/v1" }, + }); + assert.equal(result.description?.baseUrl, "https://custom.api.com/v1"); + }); + + it("applies description.provider and model together with enabled", () => { + const result = applyRuntimeOverrides(DEFAULT_CONFIG, { + description: { provider: "openai", model: "gpt-4o-mini", enabled: true }, + }); + assert.equal(result.description?.provider, "openai"); + assert.equal(result.description?.model, "gpt-4o-mini"); + assert.equal(result.description?.enabled, true); + }); + }); diff --git a/src/__tests__/embedder/factory.test.ts b/src/__tests__/embedder/factory.test.ts index e7e69c3..8664e81 100644 --- a/src/__tests__/embedder/factory.test.ts +++ b/src/__tests__/embedder/factory.test.ts @@ -71,11 +71,11 @@ describe("createEmbedder", () => { }, }); assert.throws(() => createEmbedder(config), { - message: "OpenAI provider requires an apiKey", + message: /openai provider requires an apiKey/, }); }); - it("throws for unknown provider", () => { + it("treats unknown provider as OpenAI-compatible and requires apiKey", () => { const config = makeConfig({ embedding: { provider: "unknown" as "ollama", @@ -85,7 +85,20 @@ describe("createEmbedder", () => { }, }); assert.throws(() => createEmbedder(config), { - message: /Unknown embedding provider/, + message: /requires an apiKey/, }); }); + + it("creates OpenAIProvider for unknown provider with apiKey", () => { + const config = makeConfig({ + embedding: { + provider: "custom" as "ollama", + baseUrl: "https://custom.api/v1", + model: "custom-model", + apiKey: "custom-key", + }, + }); + const embedder = createEmbedder(config); + assert.equal(embedder.name, "openai"); + }); }); diff --git a/src/cli.ts b/src/cli.ts index 04a7a47..61828df 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -8,6 +8,7 @@ import { fileURLToPath } from "node:url"; import chokidar from "chokidar"; import pc from "picocolors"; import { loadConfig, DEFAULT_CONFIG, resolveLogConfig, type RagConfig } from "./core/config.js"; +import { resolveApiKey } from "./core/resolve-api-key.js"; import { appendDebugLog } from "./core/fileLogger.js"; import { loadChunkersFromConfig } from "./chunker/loader.js"; @@ -75,10 +76,12 @@ function logCliInfo(logFilePath: string, scope: string, message: string): void { } async function resolveConfig(opt: CliOptions, logFilePath: string): Promise { + const worktree = process.cwd(); if (opt.config) { try { const configPath = path.resolve(opt.config); const cfg = loadConfig(configPath); + resolveApiKey(cfg, worktree); await loadChunkersFromConfig(cfg, path.dirname(configPath)); logCliInfo(logFilePath, "config", `${c.label("Config:")} ${c.file(configPath)}`); return logConfigDetails(logFilePath,cfg); @@ -91,6 +94,7 @@ async function resolveConfig(opt: CliOptions, logFilePath: string): Promise 0 && typeof probe[0][0] === "number") { vectorDimension = (probe[0] as number[]).length; diff --git a/src/core/config.ts b/src/core/config.ts index ac96979..6debee6 100644 --- a/src/core/config.ts +++ b/src/core/config.ts @@ -32,7 +32,7 @@ export interface AutoInjectConfig { export interface DescriptionConfig { enabled: boolean; - provider: "ollama" | "openai"; + provider: string; baseUrl: string; model: string; apiKey?: string; @@ -47,7 +47,7 @@ export interface DescriptionConfig { export interface RagConfig { embedding: { - provider: "ollama" | "openai"; + provider: string; baseUrl: string; apiKey?: string; model: string; diff --git a/src/core/provider-defaults.ts b/src/core/provider-defaults.ts new file mode 100644 index 0000000..10e9822 --- /dev/null +++ b/src/core/provider-defaults.ts @@ -0,0 +1,92 @@ +export interface ProviderDefaults { + defaultBaseUrl: string; + apiKeyEnvVar: string; + supportsEmbedding: boolean; + supportsChat: boolean; +} + +export const PROVIDER_DEFAULTS: Record = { + ollama: { + defaultBaseUrl: "http://127.0.0.1:11434", + apiKeyEnvVar: "", + supportsEmbedding: true, + supportsChat: true, + }, + openai: { + defaultBaseUrl: "https://api.openai.com/v1", + apiKeyEnvVar: "OPENAI_API_KEY", + supportsEmbedding: true, + supportsChat: true, + }, + nvidia: { + defaultBaseUrl: "https://integrate.api.nvidia.com/v1", + apiKeyEnvVar: "NVIDIA_API_KEY", + supportsEmbedding: true, + supportsChat: true, + }, + azure: { + defaultBaseUrl: "", + apiKeyEnvVar: "AZURE_OPENAI_KEY", + supportsEmbedding: true, + supportsChat: true, + }, + mistral: { + defaultBaseUrl: "https://api.mistral.ai/v1", + apiKeyEnvVar: "MISTRAL_API_KEY", + supportsEmbedding: true, + supportsChat: true, + }, + together: { + defaultBaseUrl: "https://api.together.xyz/v1", + apiKeyEnvVar: "TOGETHER_API_KEY", + supportsEmbedding: true, + supportsChat: true, + }, + groq: { + defaultBaseUrl: "https://api.groq.com/openai/v1", + apiKeyEnvVar: "GROQ_API_KEY", + supportsEmbedding: false, + supportsChat: true, + }, + deepseek: { + defaultBaseUrl: "https://api.deepseek.com", + apiKeyEnvVar: "DEEPSEEK_API_KEY", + supportsEmbedding: false, + supportsChat: true, + }, + fireworks: { + defaultBaseUrl: "https://api.fireworks.ai/inference/v1", + apiKeyEnvVar: "FIREWORKS_API_KEY", + supportsEmbedding: true, + supportsChat: true, + }, + anthropic: { + defaultBaseUrl: "https://api.anthropic.com/v1", + apiKeyEnvVar: "ANTHROPIC_API_KEY", + supportsEmbedding: false, + supportsChat: true, + }, + google: { + defaultBaseUrl: "https://generativelanguage.googleapis.com/v1", + apiKeyEnvVar: "GOOGLE_API_KEY", + supportsEmbedding: false, + supportsChat: true, + }, + cohere: { + defaultBaseUrl: "https://api.cohere.ai/v1", + apiKeyEnvVar: "COHERE_API_KEY", + supportsEmbedding: true, + supportsChat: false, + }, +}; + +export function getProviderDefault(provider: string): ProviderDefaults | undefined { + return PROVIDER_DEFAULTS[provider]; +} + +export function isOpenAiCompatible(provider: string): boolean { + if (provider === "ollama" || provider === "anthropic" || provider === "google" || provider === "cohere") { + return false; + } + return true; +} diff --git a/src/core/resolve-api-key.ts b/src/core/resolve-api-key.ts new file mode 100644 index 0000000..45d6c39 --- /dev/null +++ b/src/core/resolve-api-key.ts @@ -0,0 +1,81 @@ +import { readFileSync, existsSync } from "node:fs"; +import path from "node:path"; +import type { RagConfig } from "./config.js"; +import { getProviderDefault } from "./provider-defaults.js"; + +export function resolveApiKey( + cfg: RagConfig, + worktree?: string +): void { + resolveForSection(cfg.embedding.provider, cfg.embedding, worktree); + if (cfg.description) { + resolveForSection(cfg.description.provider, cfg.description, worktree); + } +} + +function isPlaceholder(value: string): boolean { + return value === "public" || value === "" || value === "PLACEHOLDER"; +} + +function resolveForSection( + provider: string, + section: { apiKey?: string }, + worktree?: string, +): void { + // If a real (non-placeholder) key is already set, keep it + if (section.apiKey && !isPlaceholder(section.apiKey)) return; + + const defaults = getProviderDefault(provider); + if (!defaults || !defaults.apiKeyEnvVar) return; + + const envKey = process.env[defaults.apiKeyEnvVar]; + if (envKey) { + section.apiKey = envKey; + return; + } + + if (worktree) { + const key = readOpenCodeProviderKey(worktree, provider); + if (key) { + section.apiKey = key; + return; + } + } + + // If we had a placeholder but couldn't resolve a real key, keep the placeholder + // so createEmbedder can throw a clear error about the missing key +} + +function stripJsoncComments(text: string): string { + return text.replace(/\/\/.*$/gm, "").replace(/\/\*[\s\S]*?\*\//g, ""); +} + +function readOpenCodeProviderKey(worktree: string, providerId: string): string | undefined { + const locations = [ + path.join(worktree, ".opencode", "opencode.json"), + path.join(worktree, "opencode.json"), + ]; + const homeDir = process.env.USERPROFILE || process.env.HOME; + if (homeDir) { + locations.push(path.join(homeDir, ".config", "opencode", "opencode.jsonc")); + } + + for (const loc of locations) { + try { + if (!existsSync(loc)) continue; + const raw = readFileSync(loc, "utf-8"); + const cleaned = stripJsoncComments(raw); + const config = JSON.parse(cleaned) as Record; + const providerSection = config.provider as Record | undefined; + if (!providerSection) continue; + const providerConfig = providerSection[providerId] as Record | undefined; + if (!providerConfig) continue; + const options = providerConfig.options as Record | undefined; + const key = options?.apiKey as string | undefined; + if (key) return key; + } catch { + // skip unreadable or unparseable files + } + } + return undefined; +} diff --git a/src/core/runtime-overrides.ts b/src/core/runtime-overrides.ts index 7f3ea8f..9652252 100644 --- a/src/core/runtime-overrides.ts +++ b/src/core/runtime-overrides.ts @@ -22,8 +22,16 @@ export interface RuntimeOverrides { maxChunks?: number; }; }; + embedding?: { + provider?: string; + model?: string; + baseUrl?: string; + }; description?: { enabled?: boolean; + provider?: string; + model?: string; + baseUrl?: string; }; } @@ -40,7 +48,7 @@ export function loadRuntimeOverrides(storePath: string): RuntimeOverrides { export function saveRuntimeOverride( storePath: string, path: string[], - value: boolean | number + value: boolean | number | string ): void { const overridePath = join(storePath, "runtime-overrides.json"); const overrides = loadRuntimeOverrides(storePath); @@ -96,11 +104,29 @@ export function applyRuntimeOverrides( } } + if (overrides.embedding) { + if (overrides.embedding.provider !== undefined) merged.embedding.provider = overrides.embedding.provider; + if (overrides.embedding.model !== undefined) merged.embedding.model = overrides.embedding.model; + if (overrides.embedding.baseUrl !== undefined) merged.embedding.baseUrl = overrides.embedding.baseUrl; + } + if (overrides.description) { if (overrides.description.enabled !== undefined) { if (!merged.description) merged.description = { enabled: true, provider: "ollama", baseUrl: "http://127.0.0.1:11434/api", model: "qwen2.5:3b", systemPrompt: "" }; merged.description.enabled = overrides.description.enabled; } + if (overrides.description.provider !== undefined) { + if (!merged.description) merged.description = { enabled: true, provider: "ollama", baseUrl: "http://127.0.0.1:11434/api", model: "qwen2.5:3b", systemPrompt: "" }; + merged.description.provider = overrides.description.provider; + } + if (overrides.description.model !== undefined) { + if (!merged.description) merged.description = { enabled: true, provider: "ollama", baseUrl: "http://127.0.0.1:11434/api", model: "qwen2.5:3b", systemPrompt: "" }; + merged.description.model = overrides.description.model; + } + if (overrides.description.baseUrl !== undefined) { + if (!merged.description) merged.description = { enabled: true, provider: "ollama", baseUrl: "http://127.0.0.1:11434/api", model: "qwen2.5:3b", systemPrompt: "" }; + merged.description.baseUrl = overrides.description.baseUrl; + } } return merged; diff --git a/src/describer/anthropic.ts b/src/describer/anthropic.ts new file mode 100644 index 0000000..4319a22 --- /dev/null +++ b/src/describer/anthropic.ts @@ -0,0 +1,221 @@ +import type { Chunk, DescriptionProvider } from "../core/interfaces.js"; +import type { DescriptionConfig, ProxyConfig } from "../core/config.js"; +import { postJson } from "../embedder/http.js"; + +interface AnthropicMessage { + role: "user" | "assistant"; + content: string; +} + +interface AnthropicResponse { + content?: Array<{ type?: string; text?: string }>; +} + +const RETRYABLE_STATUSES = new Set([408, 429, 500, 502, 503, 504]); + +export class AnthropicDescriptionProvider implements DescriptionProvider { + private readonly config: DescriptionConfig; + + constructor(config: DescriptionConfig) { + this.config = config; + } + + async generateDescription(chunk: Chunk): Promise { + const messages: AnthropicMessage[] = [ + { role: "user", content: buildUserMessage(chunk) }, + ]; + + return this.chatRequest(messages, this.config.timeoutMs ?? 60000); + } + + async generateBatchDescriptions(chunks: Chunk[]): Promise> { + if (chunks.length === 1) { + const desc = await this.generateDescription(chunks[0]!); + return new Map([[chunks[0]!.id, desc]]); + } + + const batchMaxChunks = this.config.batchMaxChunks ?? 25; + const batches: Chunk[][] = []; + for (let i = 0; i < chunks.length; i += batchMaxChunks) { + batches.push(chunks.slice(i, i + batchMaxChunks)); + } + + const result = new Map(); + for (const batch of batches) { + try { + const batchResult = await this.executeBatch(batch); + for (const [id, desc] of batchResult) { + result.set(id, desc); + } + } catch { + // Batch failed β€” individual fallback below handles missing chunks + } + } + + for (let i = 0; i < chunks.length; i++) { + const chunk = chunks[i]!; + if (!result.has(chunk.id)) { + try { + const desc = await this.generateDescription(chunk); + result.set(chunk.id, desc); + } catch { + // skip β€” caller will fall back to content + } + } + } + + return result; + } + + private async executeBatch(chunks: Chunk[]): Promise> { + const messages: AnthropicMessage[] = [ + { role: "user", content: buildBatchUserMessage(chunks) }, + ]; + + const timeoutMs = this.config.batchTimeoutMs ?? 120000; + const responseText = await this.chatRequest(messages, timeoutMs); + + return parseBatchResponse(responseText, chunks); + } + + private async chatRequest( + messages: AnthropicMessage[], + timeoutMs: number, + ): Promise { + const baseUrl = this.config.baseUrl.replace(/\/+$/, ""); + const apiKey = this.config.apiKey ?? ""; + const systemPrompt = this.config.systemPrompt; + + const body: Record = { + model: this.config.model, + max_tokens: 4096, + messages: [{ role: "user", content: systemPrompt + "\n\n" + messages.map((m) => `${m.role}: ${m.content}`).join("\n\n") + "\n\nassistant:" }], + }; + + const headers: Record = { + "x-api-key": apiKey, + "anthropic-version": "2023-06-01", + "Content-Type": "application/json", + }; + + const retryMax = this.config.retryMax ?? 3; + const retryBaseDelayMs = this.config.retryBaseDelayMs ?? 1000; + + let lastError: Error | undefined; + for (let attempt = 0; attempt <= retryMax; attempt++) { + const response = await postJson( + `${baseUrl}/messages`, + body, + headers, + timeoutMs, + this.config.proxy, + ); + + if (response.ok) { + const json = (await response.json()) as AnthropicResponse; + const text = json.content?.[0]?.text; + if (text && text.trim().length > 0) { + return text.trim(); + } + throw new Error(`Anthropic returned empty response: ${JSON.stringify(json)}`); + } + + const text = await response.text(); + const error = new Error( + `Anthropic LLM request failed (${response.status}): ${text}`, + ); + + if (!RETRYABLE_STATUSES.has(response.status) || attempt === retryMax) { + throw error; + } + + lastError = error; + const delayMs = retryBaseDelayMs * Math.pow(2, attempt); + await sleep(delayMs); + } + + throw lastError ?? new Error("Anthropic LLM request failed: unknown error"); + } +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function buildUserMessage(chunk: Chunk): string { + const parts: string[] = []; + + if (chunk.metadata.filePath) { + parts.push(`File: ${chunk.metadata.filePath}`); + } + if (chunk.metadata.language) { + parts.push(`Language: ${chunk.metadata.language}`); + } + parts.push(`Lines: ${chunk.metadata.startLine}-${chunk.metadata.endLine}`); + parts.push(""); + parts.push("```" + (chunk.metadata.language || "")); + parts.push(chunk.content); + parts.push("```"); + + return parts.join("\n"); +} + +function buildBatchUserMessage(chunks: Chunk[]): string { + const first = chunks[0]!; + const parts: string[] = []; + + if (first.metadata.filePath) { + parts.push(`File: ${first.metadata.filePath}`); + } + if (first.metadata.language) { + parts.push(`Language: ${first.metadata.language}`); + } + parts.push(`Chunks: ${chunks.length}`); + parts.push(""); + + for (let i = 0; i < chunks.length; i++) { + const chunk = chunks[i]!; + const lang = chunk.metadata.language || ""; + parts.push(`=== CHUNK ${i} (lines ${chunk.metadata.startLine}-${chunk.metadata.endLine}) ===`); + parts.push("```" + lang); + parts.push(chunk.content); + parts.push("```"); + parts.push(""); + } + + return parts.join("\n"); +} + +function parseBatchResponse(text: string, chunks: Chunk[]): Map { + const result = new Map(); + const chunkPattern = /^CHUNK\s+(\d+)\s*[):-]?\s*/i; + + const lines = text.split("\n"); + let currentIndex: number | null = null; + let currentDesc: string[] = []; + + for (const line of lines) { + const match = chunkPattern.exec(line.trim()); + if (match) { + if (currentIndex !== null && currentDesc.length > 0) { + const desc = currentDesc.join(" ").trim(); + if (desc.length > 0 && currentIndex >= 0 && currentIndex < chunks.length) { + result.set(chunks[currentIndex]!.id, desc); + } + } + currentIndex = parseInt(match[1]!, 10); + currentDesc = [line.slice(match[0]!.length).trim()]; + } else if (currentIndex !== null) { + currentDesc.push(line.trim()); + } + } + + if (currentIndex !== null && currentDesc.length > 0) { + const desc = currentDesc.join(" ").trim(); + if (desc.length > 0 && currentIndex >= 0 && currentIndex < chunks.length) { + result.set(chunks[currentIndex]!.id, desc); + } + } + + return result; +} diff --git a/src/describer/factory.ts b/src/describer/factory.ts index c617400..c751e6b 100644 --- a/src/describer/factory.ts +++ b/src/describer/factory.ts @@ -1,9 +1,25 @@ import type { DescriptionProvider } from "../core/interfaces.js"; import type { DescriptionConfig } from "../core/config.js"; import { LLMDescriptionProvider } from "./describer.js"; +import { AnthropicDescriptionProvider } from "./anthropic.js"; +import { GeminiDescriptionProvider } from "./gemini.js"; export function createDescriptionProvider( config: DescriptionConfig ): DescriptionProvider { + if (config.provider === "anthropic") { + if (!config.apiKey) { + throw new Error("Anthropic provider requires an apiKey"); + } + return new AnthropicDescriptionProvider(config); + } + + if (config.provider === "google") { + if (!config.apiKey) { + throw new Error("Google Gemini provider requires an apiKey"); + } + return new GeminiDescriptionProvider(config); + } + return new LLMDescriptionProvider(config); } diff --git a/src/describer/gemini.ts b/src/describer/gemini.ts new file mode 100644 index 0000000..40456ab --- /dev/null +++ b/src/describer/gemini.ts @@ -0,0 +1,231 @@ +import type { Chunk, DescriptionProvider } from "../core/interfaces.js"; +import type { DescriptionConfig } from "../core/config.js"; +import { postJson } from "../embedder/http.js"; + +interface GeminiContent { + role?: string; + parts: Array<{ text: string }>; +} + +interface GeminiResponse { + candidates?: Array<{ + content?: { + parts?: Array<{ text?: string }>; + }; + }>; +} + +const RETRYABLE_STATUSES = new Set([408, 429, 500, 502, 503, 504]); + +export class GeminiDescriptionProvider implements DescriptionProvider { + private readonly config: DescriptionConfig; + + constructor(config: DescriptionConfig) { + this.config = config; + } + + async generateDescription(chunk: Chunk): Promise { + const contents: GeminiContent[] = [ + { + role: "user", + parts: [{ text: buildUserMessage(chunk) }], + }, + ]; + + return this.chatRequest(contents, this.config.timeoutMs ?? 60000); + } + + async generateBatchDescriptions(chunks: Chunk[]): Promise> { + if (chunks.length === 1) { + const desc = await this.generateDescription(chunks[0]!); + return new Map([[chunks[0]!.id, desc]]); + } + + const batchMaxChunks = this.config.batchMaxChunks ?? 25; + const batches: Chunk[][] = []; + for (let i = 0; i < chunks.length; i += batchMaxChunks) { + batches.push(chunks.slice(i, i + batchMaxChunks)); + } + + const result = new Map(); + for (const batch of batches) { + try { + const batchResult = await this.executeBatch(batch); + for (const [id, desc] of batchResult) { + result.set(id, desc); + } + } catch { + // Batch failed β€” individual fallback below handles missing chunks + } + } + + for (let i = 0; i < chunks.length; i++) { + const chunk = chunks[i]!; + if (!result.has(chunk.id)) { + try { + const desc = await this.generateDescription(chunk); + result.set(chunk.id, desc); + } catch { + // skip β€” caller will fall back to content + } + } + } + + return result; + } + + private async executeBatch(chunks: Chunk[]): Promise> { + const contents: GeminiContent[] = [ + { + role: "user", + parts: [{ text: buildBatchUserMessage(chunks) }], + }, + ]; + + const timeoutMs = this.config.batchTimeoutMs ?? 120000; + const responseText = await this.chatRequest(contents, timeoutMs); + + return parseBatchResponse(responseText, chunks); + } + + private async chatRequest( + contents: GeminiContent[], + timeoutMs: number, + ): Promise { + const baseUrl = this.config.baseUrl.replace(/\/+$/, ""); + const apiKey = this.config.apiKey ?? ""; + const model = this.config.model; + const systemPrompt = this.config.systemPrompt; + + const allParts: Array<{ text: string }> = [{ text: systemPrompt }]; + for (const c of contents) { + allParts.push(...c.parts); + } + + const body: Record = { + contents: [{ role: "user", parts: allParts }], + }; + + const headers: Record = { + "Content-Type": "application/json", + }; + + const url = apiKey + ? `${baseUrl}/models/${model}:generateContent?key=${apiKey}` + : `${baseUrl}/models/${model}:generateContent`; + + const retryMax = this.config.retryMax ?? 3; + const retryBaseDelayMs = this.config.retryBaseDelayMs ?? 1000; + + let lastError: Error | undefined; + for (let attempt = 0; attempt <= retryMax; attempt++) { + const response = await postJson(url, body, headers, timeoutMs); + + if (response.ok) { + const json = (await response.json()) as GeminiResponse; + const text = json.candidates?.[0]?.content?.parts?.[0]?.text; + if (text && text.trim().length > 0) { + return text.trim(); + } + throw new Error(`Gemini returned empty response: ${JSON.stringify(json)}`); + } + + const text = await response.text(); + const error = new Error( + `Gemini LLM request failed (${response.status}): ${text}`, + ); + + if (!RETRYABLE_STATUSES.has(response.status) || attempt === retryMax) { + throw error; + } + + lastError = error; + const delayMs = retryBaseDelayMs * Math.pow(2, attempt); + await sleep(delayMs); + } + + throw lastError ?? new Error("Gemini LLM request failed: unknown error"); + } +} + +function sleep(ms: number): Promise { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + +function buildUserMessage(chunk: Chunk): string { + const parts: string[] = []; + + if (chunk.metadata.filePath) { + parts.push(`File: ${chunk.metadata.filePath}`); + } + if (chunk.metadata.language) { + parts.push(`Language: ${chunk.metadata.language}`); + } + parts.push(`Lines: ${chunk.metadata.startLine}-${chunk.metadata.endLine}`); + parts.push(""); + parts.push("```" + (chunk.metadata.language || "")); + parts.push(chunk.content); + parts.push("```"); + + return parts.join("\n"); +} + +function buildBatchUserMessage(chunks: Chunk[]): string { + const first = chunks[0]!; + const parts: string[] = []; + + if (first.metadata.filePath) { + parts.push(`File: ${first.metadata.filePath}`); + } + if (first.metadata.language) { + parts.push(`Language: ${first.metadata.language}`); + } + parts.push(`Chunks: ${chunks.length}`); + parts.push(""); + + for (let i = 0; i < chunks.length; i++) { + const chunk = chunks[i]!; + const lang = chunk.metadata.language || ""; + parts.push(`=== CHUNK ${i} (lines ${chunk.metadata.startLine}-${chunk.metadata.endLine}) ===`); + parts.push("```" + lang); + parts.push(chunk.content); + parts.push("```"); + parts.push(""); + } + + return parts.join("\n"); +} + +function parseBatchResponse(text: string, chunks: Chunk[]): Map { + const result = new Map(); + const chunkPattern = /^CHUNK\s+(\d+)\s*[):-]?\s*/i; + + const lines = text.split("\n"); + let currentIndex: number | null = null; + let currentDesc: string[] = []; + + for (const line of lines) { + const match = chunkPattern.exec(line.trim()); + if (match) { + if (currentIndex !== null && currentDesc.length > 0) { + const desc = currentDesc.join(" ").trim(); + if (desc.length > 0 && currentIndex >= 0 && currentIndex < chunks.length) { + result.set(chunks[currentIndex]!.id, desc); + } + } + currentIndex = parseInt(match[1]!, 10); + currentDesc = [line.slice(match[0]!.length).trim()]; + } else if (currentIndex !== null) { + currentDesc.push(line.trim()); + } + } + + if (currentIndex !== null && currentDesc.length > 0) { + const desc = currentDesc.join(" ").trim(); + if (desc.length > 0 && currentIndex >= 0 && currentIndex < chunks.length) { + result.set(chunks[currentIndex]!.id, desc); + } + } + + return result; +} diff --git a/src/embedder/cohere.ts b/src/embedder/cohere.ts new file mode 100644 index 0000000..ca8e235 --- /dev/null +++ b/src/embedder/cohere.ts @@ -0,0 +1,52 @@ +import type { EmbeddingProvider } from "../core/interfaces.js"; +import type { ProxyConfig } from "../core/config.js"; +import { postJson } from "./http.js"; + +export class CohereProvider implements EmbeddingProvider { + readonly name = "cohere"; + + private baseUrl: string; + private model: string; + private apiKey: string; + private timeoutMs: number; + private proxy?: ProxyConfig; + + constructor(baseUrl: string, model: string, apiKey: string, timeoutMs: number = 30000, proxy?: ProxyConfig) { + this.baseUrl = baseUrl.replace(/\/+$/, ""); + this.model = model; + this.apiKey = apiKey; + this.timeoutMs = timeoutMs; + this.proxy = proxy; + } + + async embed(texts: string[], purpose?: "query" | "document"): Promise { + const inputType = purpose === "query" ? "search_query" : "search_document"; + const body: Record = { + texts, + model: this.model, + input_type: inputType, + }; + const response = await postJson( + `${this.baseUrl}/embed`, + body, + { Authorization: `Bearer ${this.apiKey}` }, + this.timeoutMs, + this.proxy, + ); + + if (!response.ok) { + const body = await response.text(); + throw new Error(`Cohere embedding failed (${response.status}): ${body}`); + } + + const json = (await response.json()) as { + embeddings?: number[][]; + }; + + if (!json.embeddings || !Array.isArray(json.embeddings)) { + throw new Error(`Cohere: unexpected response: ${JSON.stringify(json)}`); + } + + return json.embeddings; + } +} diff --git a/src/embedder/factory.ts b/src/embedder/factory.ts index 94d887b..aa61b57 100644 --- a/src/embedder/factory.ts +++ b/src/embedder/factory.ts @@ -1,23 +1,33 @@ import type { EmbeddingProvider } from "../core/interfaces.js"; import type { RagConfig } from "../core/config.js"; +import { isOpenAiCompatible } from "../core/provider-defaults.js"; import { OllamaProvider } from "./ollama.js"; import { OpenAIProvider } from "./openai.js"; +import { CohereProvider } from "./cohere.js"; export function createEmbedder(config: RagConfig): EmbeddingProvider { const { provider, baseUrl, model, apiKey, proxy, timeoutMs } = config.embedding; const effectiveTimeoutMs = timeoutMs ?? 30000; - switch (provider) { - case "ollama": - return new OllamaProvider(baseUrl, model, apiKey, effectiveTimeoutMs, proxy); - case "openai": - if (!apiKey) { - throw new Error("OpenAI provider requires an apiKey"); - } - return new OpenAIProvider(baseUrl, model, apiKey, effectiveTimeoutMs, proxy); - default: - throw new Error(`Unknown embedding provider: ${provider}`); + if (provider === "ollama") { + return new OllamaProvider(baseUrl, model, apiKey, effectiveTimeoutMs, proxy); } + + if (provider === "cohere") { + if (!apiKey) { + throw new Error("Cohere provider requires an apiKey"); + } + return new CohereProvider(baseUrl, model, apiKey, effectiveTimeoutMs, proxy); + } + + if (isOpenAiCompatible(provider)) { + if (!apiKey) { + throw new Error(`${provider} provider requires an apiKey`); + } + return new OpenAIProvider(baseUrl, model, apiKey, effectiveTimeoutMs, proxy); + } + + throw new Error(`Unknown embedding provider: ${provider}`); } export async function embedBatch( diff --git a/src/embedder/openai.ts b/src/embedder/openai.ts index 68d4b9a..505212b 100644 --- a/src/embedder/openai.ts +++ b/src/embedder/openai.ts @@ -2,6 +2,18 @@ import type { EmbeddingProvider } from "../core/interfaces.js"; import type { ProxyConfig } from "../core/config.js"; import { postJson } from "./http.js"; +/** + * Map of provider base URLs to their OpenAI-compatible API hostnames. + * Used to determine provider-specific API quirks like input_type values. + */ +function inferProviderName(baseUrl: string): string { + const host = baseUrl.toLowerCase(); + if (host.includes("nvidia") || host.includes("api.nvidia.com") || host.includes("integrate.api.nvidia.com")) { + return "nvidia"; + } + return "openai"; // default β€” most providers follow OpenAI conventions +} + export class OpenAIProvider implements EmbeddingProvider { readonly name = "openai"; @@ -10,6 +22,7 @@ export class OpenAIProvider implements EmbeddingProvider { private apiKey: string; private timeoutMs: number; private proxy?: ProxyConfig; + private provider: string; constructor(baseUrl: string, model: string, apiKey: string, timeoutMs: number = 30000, proxy?: ProxyConfig) { this.baseUrl = baseUrl.replace(/\/+$/, ""); @@ -17,12 +30,25 @@ export class OpenAIProvider implements EmbeddingProvider { this.apiKey = apiKey; this.timeoutMs = timeoutMs; this.proxy = proxy; + this.provider = inferProviderName(this.baseUrl); + } + + /** + * Convert the generic purpose to the provider-specific input_type value. + * - OpenAI: document β†’ "document", query β†’ "query" (already correct) + * - NVIDIA: document β†’ "passage", query β†’ "query" (NVIDIA uses "passage" not "document") + */ + private toInputType(purpose: "query" | "document"): string { + if (this.provider === "nvidia") { + return purpose === "document" ? "passage" : "query"; + } + return purpose; } async embed(texts: string[], purpose?: "query" | "document"): Promise { const body: Record = { model: this.model, input: texts }; if (purpose) { - body.input_type = purpose; + body.input_type = this.toInputType(purpose); } const response = await postJson( `${this.baseUrl}/embeddings`, diff --git a/src/plugin.ts b/src/plugin.ts index e95347c..aa5b8fb 100644 --- a/src/plugin.ts +++ b/src/plugin.ts @@ -11,6 +11,7 @@ import { appendDebugLog } from "./core/fileLogger.js"; import { loadRuntimeOverrides, applyRuntimeOverrides } from "./core/runtime-overrides.js"; import { createBackgroundIndexer } from "./watcher.js"; import { createRagReadTool } from "./opencode/create-read-tool.js"; +import { resolveApiKey } from "./core/resolve-api-key.js"; import { existsSync } from "node:fs"; import path from "node:path"; @@ -662,6 +663,8 @@ async function loadKeywordIndex(storePath: string, logFilePath: string): Promise } } + + export const ragPlugin: Plugin = async ( input: PluginInput, _options?: Record @@ -675,6 +678,27 @@ export const ragPlugin: Plugin = async ( const storePath = path.resolve(input.directory, cfg.vectorStore.path); + // Apply runtime overrides before creating services + const overrides = loadRuntimeOverrides(storePath); + const effectiveCfg = applyRuntimeOverrides(cfg, overrides); + + // Resolve API keys from env vars or OpenCode provider config if not set in opencode-rag.json + const hadEmbeddingKey = !!effectiveCfg.embedding.apiKey; + const hadDescriptionKey = !!effectiveCfg.description?.apiKey; + resolveApiKey(effectiveCfg, input.directory); + if (!hadEmbeddingKey && effectiveCfg.embedding.apiKey) { + appendDebugLog(logFilePath, { + scope: "plugin", + message: `Resolved OpenAI API key for embedding from ${process.env.OPENAI_API_KEY ? "OPENAI_API_KEY env var" : "OpenCode provider config"}`, + }); + } + if (!hadDescriptionKey && effectiveCfg.description?.apiKey) { + appendDebugLog(logFilePath, { + scope: "plugin", + message: `Resolved OpenAI API key for description from ${process.env.OPENAI_API_KEY ? "OPENAI_API_KEY env var" : "OpenCode provider config"}`, + }); + } + // Close existing indexer for this directory if one exists (e.g. on plugin reload) const existingIndexer = backgroundIndexers.get(input.directory); if (existingIndexer) { @@ -696,10 +720,10 @@ export const ragPlugin: Plugin = async ( }); // Probe vector dimension and create store with correct dimension - const embedder = createEmbedder(cfg); + const embedder = createEmbedder(effectiveCfg); let vectorDimension = 384; try { - const probe = await embedder.embed(["dimension-probe"]); + const probe = await embedder.embed(["dimension-probe"], "query"); if (probe && probe[0] && probe[0].length > 0 && typeof probe[0][0] === "number") { vectorDimension = (probe[0] as number[]).length; } @@ -721,13 +745,13 @@ export const ragPlugin: Plugin = async ( const keywordIndex = await loadKeywordIndex(storePath, logFilePath); // Create description provider (enabled by default) - const descriptionConfig = cfg.description ?? { enabled: true, provider: "ollama" as const, baseUrl: "http://127.0.0.1:11434/api", model: "qwen2.5:3b", systemPrompt: "" }; + const descriptionConfig = effectiveCfg.description ?? { enabled: true, provider: "ollama" as const, baseUrl: "http://127.0.0.1:11434/api", model: "qwen2.5:3b", systemPrompt: "" }; const descriptionProvider = descriptionConfig.enabled ? createDescriptionProvider(descriptionConfig) : undefined; const hooks = createRagHooks({ - cfg, + cfg: effectiveCfg, storePath, logFilePath, worktree: input.directory, @@ -738,12 +762,12 @@ export const ragPlugin: Plugin = async ( }); // Start background auto-indexer if enabled - const autoIndexCfg = cfg.openCode.autoIndex ?? { enabled: true, debounceMs: 5000, intervalMs: 300000 }; + const autoIndexCfg = effectiveCfg.openCode.autoIndex ?? { enabled: true, debounceMs: 5000, intervalMs: 300000 }; if (autoIndexCfg.enabled) { const indexer = createBackgroundIndexer({ cwd: input.directory, storePath, - config: cfg, + config: effectiveCfg, store, embedder, logFilePath, diff --git a/src/tui.ts b/src/tui.ts index 5217521..fe0a9ed 100644 --- a/src/tui.ts +++ b/src/tui.ts @@ -4,7 +4,9 @@ import { createElement, insert, setProp } from "@opentui/solid"; import { readFileSync, existsSync, writeFileSync } from "node:fs"; import { dirname, join, resolve } from "node:path"; import { fileURLToPath } from "node:url"; +import type { Provider } from "@opencode-ai/sdk/v2"; import { loadRuntimeOverrides, saveRuntimeOverride } from "./core/runtime-overrides.js"; +import { PROVIDER_DEFAULTS } from "./core/provider-defaults.js"; let _version: string | undefined; function getVersion(): string { @@ -208,8 +210,9 @@ function readJsonFile>(filePath: string): T | undefi type SettingEntry = { path: string[]; label: string; - type: "boolean" | "number"; - currentValue: boolean | number; + type: "boolean" | "number" | "string"; + currentValue: boolean | number | string; + options?: { title: string; value: string; description?: string; category?: string }[]; }; type SettingCategory = { @@ -218,9 +221,105 @@ type SettingCategory = { entries: SettingEntry[]; }; +function buildModelOptions(providers: readonly Provider[]): { title: string; value: string; description?: string; category?: string }[] { + const options: { title: string; value: string; description?: string; category?: string }[] = []; + for (const provider of providers) { + if (!provider.models) continue; + for (const [modelId, model] of Object.entries(provider.models)) { + options.push({ + title: model.name ?? modelId, + value: `${provider.id}/${modelId}`, + description: provider.name, + category: provider.name, + }); + } + } + options.sort((a, b) => { + if ((a.category ?? "") < (b.category ?? "")) return -1; + if ((a.category ?? "") > (b.category ?? "")) return 1; + return (a.title ?? "").localeCompare(b.title ?? ""); + }); + options.push({ title: "Custom\u2026", value: "__custom__", description: "Enter provider/model manually" }); + return options; +} + +function providerIdToRagProvider(providerId: string): string { + if (providerId === "ollama") return "ollama"; + const defaults = PROVIDER_DEFAULTS[providerId]; + if (defaults) return providerId; + return "openai"; +} + +function resolveProviderBaseUrl(provider: Provider): string { + const baseUrl = (provider.options?.baseURL as string) ?? ""; + if (provider.id === "ollama") { + const clean = baseUrl.replace(/\/+$/, ""); + return clean ? `${clean}/api` : PROVIDER_DEFAULTS.ollama!.defaultBaseUrl + "/api"; + } + const defaults = PROVIDER_DEFAULTS[provider.id]; + return baseUrl || (defaults?.defaultBaseUrl ?? "https://api.openai.com/v1"); +} + +function saveConfigValue(configPath: string, path: string[], value: unknown): void { + try { + const data: Record = JSON.parse(readFileSync(configPath, "utf-8")); + let target = data; + for (let i = 0; i < path.length - 1; i++) { + const key = path[i]!; + if (!target[key] || typeof target[key] !== "object") { + target[key] = {}; + } + target = target[key] as Record; + } + target[path[path.length - 1]!] = value; + writeFileSync(configPath, JSON.stringify(data, null, 2), "utf-8"); + } catch { + // silently ignore write errors + } +} + +function saveModelSelection( + storePath: string, + configPath: string, + selectionValue: string, + path: string[], + providers?: readonly Provider[] +): string | undefined { + const section = path[0]!; + if (selectionValue === "__custom__") return undefined; + + const parts = selectionValue.split("/"); + if (parts.length < 2) return undefined; + + const providerId = parts[0]!; + const modelId = parts.slice(1).join("/"); + + const provider = providers?.find((p) => p.id === providerId); + const ragProvider = providerIdToRagProvider(providerId); + const baseUrl = provider ? resolveProviderBaseUrl(provider) : ""; + + saveRuntimeOverride(storePath, [section, "provider"], ragProvider); + saveConfigValue(configPath, [section, "provider"], ragProvider); + saveRuntimeOverride(storePath, [section, "model"], modelId); + saveConfigValue(configPath, [section, "model"], modelId); + if (baseUrl) { + saveRuntimeOverride(storePath, [section, "baseUrl"], baseUrl); + saveConfigValue(configPath, [section, "baseUrl"], baseUrl); + } + + const apiKey = (provider?.options?.apiKey as string) ?? ""; + if (apiKey) { + saveRuntimeOverride(storePath, [section, "apiKey"], apiKey); + saveConfigValue(configPath, [section, "apiKey"], apiKey); + } + + return selectionValue; +} + function buildSettingCategories( cfg: Record, ro: Record, + providers?: readonly Provider[], ): SettingCategory[] { const retrievalCfg = (cfg.retrieval ?? {}) as Record; const retrievalRo = (ro.retrieval ?? {}) as Record; @@ -237,6 +336,17 @@ function buildSettingCategories( const descCfg = (cfg.description ?? {}) as Record; const descRo = (ro.description ?? {}) as Record; + const embeddingCfg = (cfg.embedding ?? {}) as Record; + const embeddingRo = (ro.embedding ?? {}) as Record; + + const modelOptions = providers ? buildModelOptions(providers) : undefined; + + function displayModel(roProvider: unknown, roModel: unknown, cfgProvider: unknown, cfgModel: unknown, defaultProvider: string, defaultModel: string): string { + const p = (roProvider as string) ?? (cfgProvider as string) ?? defaultProvider; + const m = (roModel as string) ?? (cfgModel as string) ?? defaultModel; + return `${p}/${m}`; + } + return [ { id: "retrieval", @@ -310,6 +420,19 @@ function buildSettingCategories( }, ], }, + { + id: "embedding", + label: "Embedding", + entries: [ + { + path: ["embedding", "model"], + label: "Model", + type: "string", + currentValue: displayModel(embeddingRo.provider, embeddingRo.model, embeddingCfg.provider, embeddingCfg.model, "ollama", "embeddinggemma:latest"), + options: modelOptions, + }, + ], + }, { id: "description", label: "LLM Descriptions", @@ -320,6 +443,13 @@ function buildSettingCategories( type: "boolean", currentValue: (descRo.enabled as boolean) ?? (descCfg.enabled as boolean) ?? true, }, + { + path: ["description", "model"], + label: "Model", + type: "string", + currentValue: displayModel(descRo.provider, descRo.model, descCfg.provider, descCfg.model, "ollama", "qwen2.5:3b"), + options: modelOptions, + }, ], }, ]; @@ -335,7 +465,7 @@ async function openSettingsDialog(api: { // eslint-disable-next-line @typescript-eslint/no-explicit-any toast: (input: any) => void; }; - state: { path: { worktree: string | undefined } }; + state: { path: { worktree: string | undefined }; provider?: readonly Provider[] }; }): Promise { const worktree = api.state.path.worktree; if (!worktree) return; @@ -356,15 +486,15 @@ async function openSettingsDialog(api: { const vs = cfg.vectorStore as Record | undefined; const storeRelPath = (vs?.path as string) ?? ".opencode/rag_db"; const storePath = resolve(worktree, storeRelPath); + const providers = api.state.provider; function getCurrentOverrides(): Record { return loadRuntimeOverrides(storePath) as unknown as Record; } - // Build options for a category or setting list function showCategoryMenu(): void { const ro = getCurrentOverrides(); - const cats = buildSettingCategories(cfg, ro); + const cats = buildSettingCategories(cfg, ro, providers); const options = [ ...cats.map((c) => ({ title: c.label, @@ -397,7 +527,7 @@ async function openSettingsDialog(api: { ...cat.entries.map((s) => ({ title: `${s.label}: ${s.type === "boolean" ? (s.currentValue ? "Yes" : "No") : String(s.currentValue)}`, value: s.path.join("."), - description: s.type === "boolean" ? "Select to toggle" : "Select to edit", + description: s.options ? "Select to open model picker" : (s.type === "boolean" ? "Select to toggle" : "Select to edit"), })), { title: "\u2190 Back", value: "__back__", description: "Return to categories" }, ]; @@ -416,9 +546,12 @@ async function openSettingsDialog(api: { const entry = cat.entries.find((s) => s.path.join(".") === option.value); if (!entry) return; - if (entry.type === "boolean") { + if (entry.options) { + showModelPicker(entry, cat); + } else if (entry.type === "boolean") { const newVal = !entry.currentValue; saveRuntimeOverride(storePath, entry.path, newVal); + saveConfigValue(configPath!, entry.path, newVal); api.ui.toast({ variant: "success", title: "Settings", @@ -426,7 +559,7 @@ async function openSettingsDialog(api: { }); entry.currentValue = newVal; showSettingMenu(cat); - } else { + } else if (entry.type === "number") { api.ui.dialog.replace( () => api.ui.DialogPrompt({ @@ -437,6 +570,7 @@ async function openSettingsDialog(api: { const num = parseFloat(input); if (!isNaN(num)) { saveRuntimeOverride(storePath, entry.path, num); + saveConfigValue(configPath!, entry.path, num); api.ui.toast({ variant: "success", title: "Settings", @@ -451,7 +585,84 @@ async function openSettingsDialog(api: { }, }), ); + } else { + api.ui.dialog.replace( + () => + api.ui.DialogPrompt({ + title: `Edit ${entry.label}`, + placeholder: "Enter new value", + value: String(entry.currentValue), + onConfirm: (input: string) => { + saveRuntimeOverride(storePath, entry.path, input); + saveConfigValue(configPath!, entry.path, input); + api.ui.toast({ + variant: "success", + title: "Settings", + message: `${entry.label}: ${input}`, + }); + entry.currentValue = input; + showSettingMenu(cat); + }, + onCancel: () => { + showSettingMenu(cat); + }, + }), + ); + } + }, + }), + ); + } + + function showModelPicker(entry: SettingEntry, cat: SettingCategory): void { + api.ui.dialog.replace( + () => + api.ui.DialogSelect({ + title: `Select ${entry.label}`, + placeholder: "Search models\u2026", + options: entry.options ?? [], + onSelect: (option: { title: string; value: string }) => { + if (option.value === "__custom__") { + api.ui.dialog.replace( + () => + api.ui.DialogPrompt({ + title: `Custom ${entry.label}`, + placeholder: "e.g. ollama/my-model or openai/custom-model", + value: typeof entry.currentValue === "string" ? entry.currentValue : "", + onConfirm: (input: string) => { + const saved = saveModelSelection(storePath, configPath!, input, entry.path, providers); + if (saved) { + entry.currentValue = saved; + } else if (input) { + saveRuntimeOverride(storePath, entry.path, input); + saveConfigValue(configPath!, entry.path, input); + entry.currentValue = input; + } + showSettingMenu(cat); + }, + onCancel: () => showSettingMenu(cat), + }), + ); + return; + } + const saved = saveModelSelection(storePath, configPath!, option.value, entry.path, providers); + if (saved) { + entry.currentValue = saved; + api.ui.toast({ + variant: "success", + title: "Settings", + message: `${entry.label}: ${saved}`, + }); + const isEmbedding = entry.path[0] === "embedding"; + if (isEmbedding) { + api.ui.toast({ + variant: "warning", + title: "Settings", + message: "Embedding changed. Re-index may be required. Restart OpenCode for changes.", + }); + } } + showSettingMenu(cat); }, }), );