Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,4 +46,4 @@

## Security & Configuration Tips
- Never commit secrets. The default local provider needs no key; OpenAI is opt-in.
- The index/database live in `CODERAG_STORE_DIR` (default `./.coderag/`, gitignored).
- The index/database live in `CODERAG_STORE_DIR` (default `<watched-dir>/.coderag/`, derived from the watched dir rather than the cwd, gitignored).
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,8 @@ coderag search "where are duplicate vectors removed on file change" --watched-di
"""Incremental indexing orchestration. ...the critical correctness property…"""
```

By default the index lives in `./.coderag/`. Set `CODERAG_WATCHED_DIR` / `CODERAG_STORE_DIR`
By default the index lives in `<watched-dir>/.coderag/` (next to the code it indexes, so it's
found no matter where you run `coderag` from). Set `CODERAG_WATCHED_DIR` / `CODERAG_STORE_DIR`
(or copy `example.env` to `.env`) to avoid repeating flags.

## 🧑‍💻 The surfaces
Expand Down Expand Up @@ -384,7 +385,7 @@ table is in [`docs/configuration.md`](docs/configuration.md).
| `CODERAG_PROVIDER` | `fastembed` | Embedding backend: `fastembed` (local) · `openai` (OpenAI API **or** any OpenAI-compatible/local server) · `fake` |
| `CODERAG_MODEL` | `BAAI/bge-small-en-v1.5` | Local embedding model (`coderag eval --list-models`) |
| `CODERAG_WATCHED_DIR` | cwd | Codebase to index |
| `CODERAG_STORE_DIR` | `./.coderag` | Where the LanceDB store lives |
| `CODERAG_STORE_DIR` | `<watched-dir>/.coderag` | Where the LanceDB store lives (derived from the watched dir, not the cwd) |
| `CODERAG_TOP_K` | `8` | Results returned |
| `OPENAI_BASE_URL` | – | Point at a self-hosted / local OpenAI-compatible server (Ollama, vLLM, LM Studio, LocalAI) — enables local embeddings **and** local answers |
| `OPENAI_API_KEY` | – | OpenAI **cloud** embeddings / answers (optional for a local server) |
Expand Down
49 changes: 46 additions & 3 deletions coderag/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,18 @@ def _env_path(key: str, default: Path) -> Path:
return Path(raw).expanduser()


def _env_path_opt(key: str) -> Path | None:
"""Like :func:`_env_path` but returns ``None`` when the var is unset/blank.

Used for ``store_dir`` so an absent ``CODERAG_STORE_DIR`` leaves it unset and lets it be
derived from ``watched_dir`` (see :meth:`Config.__post_init__`) rather than the cwd.
"""
raw = os.getenv(key)
if raw is None or not raw.strip():
return None
return Path(raw).expanduser()


def _env_tuple(key: str, default: Tuple[str, ...]) -> Tuple[str, ...]:
"""Parse a comma-separated env var into a tuple of trimmed, non-empty values."""
raw = os.getenv(key)
Expand All @@ -139,7 +151,14 @@ class Config:

# --- Locations ---
watched_dir: Path = field(default_factory=Path.cwd)
store_dir: Path = field(default_factory=lambda: Path.cwd() / ".coderag")
# Defaults to ``<watched_dir>/.coderag`` (resolved in ``__post_init__``), so the index
# lives next to the code it indexes. It deliberately does NOT default to the *current
# working directory*: a cwd-relative store silently pointed at a different (often empty)
# ``.coderag`` whenever a command ran from a directory other than the one indexed — e.g.
# ``coderag index --watched-dir /home/me`` would write to ``/home/me/.coderag`` only if
# you happened to be standing in ``/home/me``, and ``coderag status`` from elsewhere then
# found nothing. Set it explicitly with ``--store-dir`` / ``CODERAG_STORE_DIR`` to override.
store_dir: Path = None # type: ignore[assignment] # filled in by __post_init__

# --- What to index ---
languages: Tuple[str, ...] = DEFAULT_LANGUAGES
Expand Down Expand Up @@ -254,8 +273,31 @@ class Config:
demo_max_answers: int = 5 # LLM answers allowed per browser session
demo_cooldown_seconds: int = 20 # minimum seconds between answers in a session

def __post_init__(self) -> None:
# Derive the store location from the watched dir when it wasn't set explicitly, so the
# index is found regardless of the current working directory. Runs on every
# construction — including the ``replace()`` inside ``with_overrides`` — so overriding
# ``watched_dir`` without a ``store_dir`` re-derives the store from the new watched dir.
if self.store_dir is None:
# Frozen dataclass: assign through object.__setattr__.
object.__setattr__(self, "store_dir", self.watched_dir / ".coderag")

def with_overrides(self, **kwargs: object) -> "Config":
"""Return a copy with the given fields replaced (config stays immutable)."""
"""Return a copy with the given fields replaced (config stays immutable).

Overriding ``watched_dir`` alone also moves the store: ``replace`` re-runs
``__post_init__``, but only when ``store_dir`` is left unset here. To keep an
auto-derived store from following a new watched dir, pass ``store_dir`` explicitly.
"""
# If watched_dir is being moved but store_dir isn't given *and* the current store_dir
# was auto-derived from the old watched_dir, clear it so __post_init__ re-derives it
# against the new watched_dir.
if (
"watched_dir" in kwargs
and "store_dir" not in kwargs
and self.store_dir == self.watched_dir / ".coderag"
):
kwargs = {**kwargs, "store_dir": None}
return replace(self, **kwargs) # type: ignore[arg-type]

@classmethod
Expand All @@ -272,7 +314,8 @@ def from_env(cls, **overrides: object) -> "Config":
"CODERAG_CACHE_DIR", Path.home() / ".cache" / "coderag"
),
watched_dir=_env_path("CODERAG_WATCHED_DIR", Path.cwd()),
store_dir=_env_path("CODERAG_STORE_DIR", Path.cwd() / ".coderag"),
# None => derived from watched_dir in __post_init__ (never the cwd).
store_dir=_env_path_opt("CODERAG_STORE_DIR"), # type: ignore[arg-type]
top_k=_env_int("CODERAG_TOP_K", cls.top_k),
fetch_k=_env_int("CODERAG_FETCH_K", cls.fetch_k),
rrf_k=_env_int("CODERAG_RRF_K", cls.rrf_k),
Expand Down
3 changes: 2 additions & 1 deletion coderag/surfaces/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,8 @@ def _env_port(key: str, default: int) -> int:
def _add_common(p: argparse.ArgumentParser) -> None:
p.add_argument("--watched-dir", help="Codebase root to index/search.")
p.add_argument(
"--store-dir", help="Where the index/database live (default ./.coderag)."
"--store-dir",
help="Where the index/database live (default <watched-dir>/.coderag).",
)
p.add_argument(
"--provider",
Expand Down
2 changes: 1 addition & 1 deletion docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ optional.
| Variable | Default | Meaning |
| --- | --- | --- |
| `CODERAG_WATCHED_DIR` | cwd | Codebase to index/search. |
| `CODERAG_STORE_DIR` | `./.coderag` | Where the LanceDB store lives. |
| `CODERAG_STORE_DIR` | `<watched-dir>/.coderag` | Where the LanceDB store lives. Derived from the watched dir (not the cwd) so the index is found no matter where you run `coderag` from. |
| `CODERAG_INDEX_ALL_TEXT` | `false` | Index any UTF-8 text file (docs/config/extensionless), not just code. Binary files are always skipped. |

### Retrieval & quality
Expand Down
6 changes: 4 additions & 2 deletions example.env
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ CODERAG_MODEL=BAAI/bge-small-en-v1.5
# --- Locations ---
# The codebase to index/search (defaults to the current directory).
CODERAG_WATCHED_DIR=/path/to/your/codebase
# Where the LanceDB store is kept (defaults to ./.coderag).
# CODERAG_STORE_DIR=./.coderag
# Where the LanceDB store is kept. Defaults to <watched-dir>/.coderag — i.e. next to the
# code it indexes, so the index is found no matter which directory you run coderag from
# (not the current working directory). Uncomment to put it somewhere else.
# CODERAG_STORE_DIR=/path/to/your/codebase/.coderag

# --- Retrieval ---
# CODERAG_TOP_K=8
Expand Down
47 changes: 47 additions & 0 deletions tests/test_config_and_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,53 @@ def test_config_defaults_and_derived_paths(tmp_path):
assert cfg.store_dir == tmp_path / ".coderag"


def test_store_dir_derives_from_watched_dir_not_cwd(tmp_path, monkeypatch):
"""The store lives under the *watched* dir, not the shell's cwd.

Regression: a cwd-relative default silently pointed `status`/`search` at a different
(empty) ``.coderag`` whenever a command ran from a directory other than the indexed one.
"""
watched = tmp_path / "project"
watched.mkdir()
elsewhere = tmp_path / "elsewhere"
elsewhere.mkdir()
monkeypatch.chdir(elsewhere) # stand somewhere other than the watched dir

cfg = Config(watched_dir=watched)
assert cfg.store_dir == watched / ".coderag"

# Same result via from_env, and regardless of where the process is launched from.
cfg_env = Config.from_env(watched_dir=watched)
assert cfg_env.store_dir == watched / ".coderag"


def test_explicit_store_dir_overrides_derivation(tmp_path):
watched = tmp_path / "project"
explicit = tmp_path / "custom-store"
cfg = Config(watched_dir=watched, store_dir=explicit)
assert cfg.store_dir == explicit # explicit wins; not <watched>/.coderag


def test_store_dir_from_env_var(tmp_path, monkeypatch):
monkeypatch.setenv("CODERAG_STORE_DIR", str(tmp_path / "env-store"))
cfg = Config.from_env(watched_dir=tmp_path / "project")
assert cfg.store_dir == tmp_path / "env-store"


def test_overriding_watched_dir_moves_derived_store(tmp_path):
"""Re-pointing watched_dir on an auto-derived config moves the store with it."""
cfg = Config(watched_dir=tmp_path / "a")
assert cfg.store_dir == tmp_path / "a" / ".coderag"
moved = cfg.with_overrides(watched_dir=tmp_path / "b")
assert moved.store_dir == tmp_path / "b" / ".coderag"
# But an explicit store_dir is sticky across a watched_dir move.
pinned = Config(watched_dir=tmp_path / "a", store_dir=tmp_path / "store")
assert (
pinned.with_overrides(watched_dir=tmp_path / "b").store_dir
== tmp_path / "store"
)


def test_config_is_immutable_and_copies():
cfg = Config()
updated = cfg.with_overrides(top_k=42)
Expand Down
Loading