Skip to content

Commit 9fcff66

Browse files
indexzeroclaude
andauthored
feat(cache) add local directory storage driver for virtual cache mounts (#23)
Add LocalDirStorageDriver that implements the storage driver interface for reading packument JSON files from local directories. This enables mounting existing datasets as virtual caches without importing them. Storage driver changes: - LocalDirStorageDriver with list/get/has (read-only) - createStorageDriver({ LOCAL_DIR: path }) factory support - isLocalPath() utility for origin detection Architecture improvements: - View query.js simplified to use cache directly - Removed origin-adapter abstraction from view module - Driver selection moved to CLI layer where origin is known Usage: views with `registry: "./path/to/packuments"` now work transparently, enabling analysis of local datasets alongside registry-cached data. --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
1 parent d685fd1 commit 9fcff66

8 files changed

Lines changed: 498 additions & 8 deletions

File tree

cli/cli/src/cmd/view/query.js

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { ViewStore, queryView, countView, collectView } from '@_all_docs/view';
2-
import { Cache, createStorageDriver } from '@_all_docs/cache';
2+
import { Cache, createStorageDriver, isLocalPath } from '@_all_docs/cache';
33

44
export const usage = `Usage: _all_docs view query <name> [options]
55
@@ -43,8 +43,12 @@ export const command = async (cli) => {
4343
process.exit(1);
4444
}
4545

46-
const driver = await createStorageDriver({ CACHE_DIR: cli.dir('packuments') });
47-
const cache = new Cache({ path: cli.dir('packuments'), driver });
46+
// Create appropriate storage driver based on view's origin
47+
const origin = view.registry || view.origin;
48+
const driver = isLocalPath(origin)
49+
? await createStorageDriver({ LOCAL_DIR: origin })
50+
: await createStorageDriver({ CACHE_DIR: cli.dir('packuments') });
51+
const cache = new Cache({ path: origin, driver });
4852

4953
const options = {
5054
limit: cli.values.limit ? parseInt(cli.values.limit, 10) : undefined,

doc/cli-reference.md

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -616,6 +616,95 @@ npx _all_docs cache create-index > previous-index.txt
616616

617617
---
618618

619+
## view
620+
621+
Commands for defining and querying views over cached data.
622+
623+
Views can be created over two types of origins:
624+
- **Registry cache**: Data fetched from npm or other registries
625+
- **Local directory**: JSON packument files in a directory
626+
627+
### view define
628+
629+
Define a named view for querying packuments.
630+
631+
```bash
632+
npx _all_docs view define <name> [options]
633+
```
634+
635+
**Options:**
636+
- `--origin <origin>` - Data origin: encoded name (npm), URL, or local path
637+
- `--registry <url>` - Registry URL (alternative to origin)
638+
- `--select <expr>` - Field selection expression
639+
- `--type <type>` - Entity type: packument, partition (default: packument)
640+
- `--force`, `-f` - Overwrite existing view definition
641+
642+
**Origin Types:**
643+
644+
| Type | Example | Description |
645+
|------|---------|-------------|
646+
| Encoded name | `npm` | Pre-defined registry origin |
647+
| Registry URL | `https://npm.example.com` | Custom registry |
648+
| Local path | `./local-data/` | Directory of JSON files |
649+
| file:// URL | `file:///data/archive/` | Explicit file URL |
650+
651+
**Examples:**
652+
653+
```bash
654+
# Define view over npm registry cache
655+
npx _all_docs view define npm-pkgs --origin npm
656+
657+
# Define view over local directory of packuments
658+
npx _all_docs view define local-snapshot --origin ./local-packuments/
659+
660+
# Using file:// URL for local directory
661+
npx _all_docs view define archive --origin file:///data/npm-archive/
662+
```
663+
664+
### view query
665+
666+
Query a defined view and output results.
667+
668+
```bash
669+
npx _all_docs view query <name> [options]
670+
```
671+
672+
**Options:**
673+
- `--limit <n>` - Maximum records to return
674+
- `--filter <expr>` - Filter expression
675+
- `--count` - Only output the count of matching records
676+
677+
### view join
678+
679+
Join two views on a common key.
680+
681+
```bash
682+
npx _all_docs view join <left> <right> [options]
683+
```
684+
685+
This enables comparing packages across different sources:
686+
687+
```bash
688+
# Compare npm cache against local snapshot
689+
npx _all_docs view define npm --origin npm
690+
npx _all_docs view define snapshot --origin ./snapshot/
691+
npx _all_docs view join npm snapshot --diff --select 'name'
692+
```
693+
694+
### view list
695+
696+
List all defined views.
697+
698+
### view show
699+
700+
Show details of a defined view.
701+
702+
### view delete
703+
704+
Delete a defined view.
705+
706+
---
707+
619708
## Troubleshooting
620709

621710
### Common Issues

src/cache/index.js

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@ export { BaseHTTPClient, createAgent, createDispatcher } from './http.js';
33
export { CacheEntry } from './entry.js';
44
export { createCacheKey, decodeCacheKey, createPartitionKey, createPackumentKey, encodeOrigin } from './cache-key.js';
55
export { PartitionCheckpoint } from './checkpoint.js';
6-
export { createStorageDriver } from './storage-driver.js';
6+
export { createStorageDriver, LocalDirStorageDriver, isLocalPath } from './storage-driver.js';
77
export { AuthError, TempError, PermError, categorizeHttpError } from './errors.js';

src/cache/local-dir-driver.js

Lines changed: 159 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
/**
2+
* Local directory storage driver - reads packument JSON files from a directory
3+
*
4+
* This is a read-only storage driver that allows mounting existing directories
5+
* of packument JSON files as a virtual cache. Useful for analyzing local datasets
6+
* without importing them into the cache.
7+
*/
8+
import { readdir, readFile, access } from 'node:fs/promises';
9+
import { join } from 'node:path';
10+
import { existsSync } from 'node:fs';
11+
12+
/**
13+
* Check if an origin string represents a local path
14+
* @param {string} origin - Origin string (URL, encoded origin, or path)
15+
* @returns {boolean} True if origin is a local path
16+
*/
17+
export function isLocalPath(origin) {
18+
if (!origin) return false;
19+
20+
// file:// URL
21+
if (origin.startsWith('file://')) return true;
22+
23+
// Absolute path (Unix)
24+
if (origin.startsWith('/')) return true;
25+
26+
// Relative path starting with ./
27+
if (origin.startsWith('./') || origin.startsWith('../')) return true;
28+
29+
// Windows absolute path (C:\, D:\, etc.)
30+
if (/^[A-Za-z]:[\\\/]/.test(origin)) return true;
31+
32+
// Path that exists on disk (fallback check)
33+
if (existsSync(origin)) return true;
34+
35+
return false;
36+
}
37+
38+
/**
39+
* Normalize origin to a filesystem path
40+
* @param {string} origin - Origin string
41+
* @returns {string} Filesystem path
42+
*/
43+
function normalizePath(origin) {
44+
if (origin.startsWith('file://')) {
45+
return origin.replace('file://', '');
46+
}
47+
return origin;
48+
}
49+
50+
/**
51+
* Read-only storage driver for local directories of JSON files
52+
*/
53+
export class LocalDirStorageDriver {
54+
/**
55+
* @param {string} dirPath - Path to directory containing packument JSON files
56+
*/
57+
constructor(dirPath) {
58+
this.dirPath = normalizePath(dirPath);
59+
this.supportsBatch = false;
60+
this.supportsBloom = false;
61+
}
62+
63+
/**
64+
* Get a packument by key (filename)
65+
* @param {string} key - Filename (with or without .json extension)
66+
* @returns {Promise<object>} Parsed JSON content
67+
* @throws {Error} If file not found or invalid JSON
68+
*/
69+
async get(key) {
70+
const filename = key.endsWith('.json') ? key : `${key}.json`;
71+
const filePath = join(this.dirPath, filename);
72+
73+
try {
74+
const content = await readFile(filePath, 'utf8');
75+
return JSON.parse(content);
76+
} catch (error) {
77+
if (error.code === 'ENOENT') {
78+
throw new Error(`Key not found: ${key}`);
79+
}
80+
throw error;
81+
}
82+
}
83+
84+
/**
85+
* Check if a key exists
86+
* @param {string} key - Filename
87+
* @returns {Promise<boolean>}
88+
*/
89+
async has(key) {
90+
const filename = key.endsWith('.json') ? key : `${key}.json`;
91+
const filePath = join(this.dirPath, filename);
92+
93+
try {
94+
await access(filePath);
95+
return true;
96+
} catch {
97+
return false;
98+
}
99+
}
100+
101+
/**
102+
* List all JSON files in the directory
103+
* Note: prefix is ignored for local directories since the directory path
104+
* itself serves as the namespace isolation.
105+
* @param {string} [_prefix] - Ignored for local directories
106+
* @yields {string} Filenames
107+
*/
108+
async *list(_prefix) {
109+
const files = await readdir(this.dirPath);
110+
for (const file of files) {
111+
if (file.endsWith('.json')) {
112+
yield file;
113+
}
114+
}
115+
}
116+
117+
/**
118+
* Put is not supported - this is a read-only driver
119+
* @throws {Error} Always throws
120+
*/
121+
async put(_key, _value) {
122+
throw new Error('LocalDirStorageDriver is read-only');
123+
}
124+
125+
/**
126+
* Delete is not supported - this is a read-only driver
127+
* @throws {Error} Always throws
128+
*/
129+
async delete(_key) {
130+
throw new Error('LocalDirStorageDriver is read-only');
131+
}
132+
133+
/**
134+
* Clear is not supported - this is a read-only driver
135+
* @throws {Error} Always throws
136+
*/
137+
async clear() {
138+
throw new Error('LocalDirStorageDriver is read-only');
139+
}
140+
141+
/**
142+
* Batch put is not supported - this is a read-only driver
143+
* @throws {Error} Always throws
144+
*/
145+
async putBatch(_entries) {
146+
throw new Error('LocalDirStorageDriver is read-only');
147+
}
148+
149+
/**
150+
* Get metadata info for a file (basic implementation)
151+
* @param {string} key - Filename
152+
* @returns {Promise<object|null>} Basic info or null if not found
153+
*/
154+
async info(key) {
155+
const exists = await this.has(key);
156+
if (!exists) return null;
157+
return { key, path: join(this.dirPath, key) };
158+
}
159+
}

src/cache/storage-driver.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,21 @@
1+
import { LocalDirStorageDriver, isLocalPath } from './local-dir-driver.js';
2+
3+
export { LocalDirStorageDriver, isLocalPath };
4+
15
/**
26
* Creates a storage driver based on the runtime environment
37
* @param {Object} env - Environment configuration
8+
* @param {string} [env.LOCAL_DIR] - Local directory path (read-only driver)
9+
* @param {string} [env.CACHE_DIR] - Cache directory path
10+
* @param {string} [env.RUNTIME] - Runtime environment (node, cloudflare, fastly, cloudrun)
411
* @returns {Object} Storage driver instance
512
*/
613
export async function createStorageDriver(env) {
14+
// Local directory takes precedence - it's a read-only mount
15+
if (env?.LOCAL_DIR) {
16+
return new LocalDirStorageDriver(env.LOCAL_DIR);
17+
}
18+
719
const runtime = env?.RUNTIME || 'node';
820

921
switch (runtime) {

0 commit comments

Comments
 (0)