diff --git a/cli/cli/src/cmd/view/query.js b/cli/cli/src/cmd/view/query.js index 583ff91..e5a803f 100644 --- a/cli/cli/src/cmd/view/query.js +++ b/cli/cli/src/cmd/view/query.js @@ -1,5 +1,5 @@ import { ViewStore, queryView, countView, collectView } from '@_all_docs/view'; -import { Cache, createStorageDriver } from '@_all_docs/cache'; +import { Cache, createStorageDriver, isLocalPath } from '@_all_docs/cache'; export const usage = `Usage: _all_docs view query [options] @@ -43,8 +43,12 @@ export const command = async (cli) => { process.exit(1); } - const driver = await createStorageDriver({ CACHE_DIR: cli.dir('packuments') }); - const cache = new Cache({ path: cli.dir('packuments'), driver }); + // Create appropriate storage driver based on view's origin + const origin = view.registry || view.origin; + const driver = isLocalPath(origin) + ? await createStorageDriver({ LOCAL_DIR: origin }) + : await createStorageDriver({ CACHE_DIR: cli.dir('packuments') }); + const cache = new Cache({ path: origin, driver }); const options = { limit: cli.values.limit ? parseInt(cli.values.limit, 10) : undefined, diff --git a/doc/cli-reference.md b/doc/cli-reference.md index 35f945d..89ef742 100644 --- a/doc/cli-reference.md +++ b/doc/cli-reference.md @@ -616,6 +616,95 @@ npx _all_docs cache create-index > previous-index.txt --- +## view + +Commands for defining and querying views over cached data. + +Views can be created over two types of origins: +- **Registry cache**: Data fetched from npm or other registries +- **Local directory**: JSON packument files in a directory + +### view define + +Define a named view for querying packuments. + +```bash +npx _all_docs view define [options] +``` + +**Options:** +- `--origin ` - Data origin: encoded name (npm), URL, or local path +- `--registry ` - Registry URL (alternative to origin) +- `--select ` - Field selection expression +- `--type ` - Entity type: packument, partition (default: packument) +- `--force`, `-f` - Overwrite existing view definition + +**Origin Types:** + +| Type | Example | Description | +|------|---------|-------------| +| Encoded name | `npm` | Pre-defined registry origin | +| Registry URL | `https://npm.example.com` | Custom registry | +| Local path | `./local-data/` | Directory of JSON files | +| file:// URL | `file:///data/archive/` | Explicit file URL | + +**Examples:** + +```bash +# Define view over npm registry cache +npx _all_docs view define npm-pkgs --origin npm + +# Define view over local directory of packuments +npx _all_docs view define local-snapshot --origin ./local-packuments/ + +# Using file:// URL for local directory +npx _all_docs view define archive --origin file:///data/npm-archive/ +``` + +### view query + +Query a defined view and output results. + +```bash +npx _all_docs view query [options] +``` + +**Options:** +- `--limit ` - Maximum records to return +- `--filter ` - Filter expression +- `--count` - Only output the count of matching records + +### view join + +Join two views on a common key. + +```bash +npx _all_docs view join [options] +``` + +This enables comparing packages across different sources: + +```bash +# Compare npm cache against local snapshot +npx _all_docs view define npm --origin npm +npx _all_docs view define snapshot --origin ./snapshot/ +npx _all_docs view join npm snapshot --diff --select 'name' +``` + +### view list + +List all defined views. + +### view show + +Show details of a defined view. + +### view delete + +Delete a defined view. + +--- + ## Troubleshooting ### Common Issues diff --git a/src/cache/index.js b/src/cache/index.js index 5c7e66b..ff30158 100644 --- a/src/cache/index.js +++ b/src/cache/index.js @@ -3,5 +3,5 @@ export { BaseHTTPClient, createAgent, createDispatcher } from './http.js'; export { CacheEntry } from './entry.js'; export { createCacheKey, decodeCacheKey, createPartitionKey, createPackumentKey, encodeOrigin } from './cache-key.js'; export { PartitionCheckpoint } from './checkpoint.js'; -export { createStorageDriver } from './storage-driver.js'; +export { createStorageDriver, LocalDirStorageDriver, isLocalPath } from './storage-driver.js'; export { AuthError, TempError, PermError, categorizeHttpError } from './errors.js'; \ No newline at end of file diff --git a/src/cache/local-dir-driver.js b/src/cache/local-dir-driver.js new file mode 100644 index 0000000..fb84ab2 --- /dev/null +++ b/src/cache/local-dir-driver.js @@ -0,0 +1,159 @@ +/** + * Local directory storage driver - reads packument JSON files from a directory + * + * This is a read-only storage driver that allows mounting existing directories + * of packument JSON files as a virtual cache. Useful for analyzing local datasets + * without importing them into the cache. + */ +import { readdir, readFile, access } from 'node:fs/promises'; +import { join } from 'node:path'; +import { existsSync } from 'node:fs'; + +/** + * Check if an origin string represents a local path + * @param {string} origin - Origin string (URL, encoded origin, or path) + * @returns {boolean} True if origin is a local path + */ +export function isLocalPath(origin) { + if (!origin) return false; + + // file:// URL + if (origin.startsWith('file://')) return true; + + // Absolute path (Unix) + if (origin.startsWith('/')) return true; + + // Relative path starting with ./ + if (origin.startsWith('./') || origin.startsWith('../')) return true; + + // Windows absolute path (C:\, D:\, etc.) + if (/^[A-Za-z]:[\\\/]/.test(origin)) return true; + + // Path that exists on disk (fallback check) + if (existsSync(origin)) return true; + + return false; +} + +/** + * Normalize origin to a filesystem path + * @param {string} origin - Origin string + * @returns {string} Filesystem path + */ +function normalizePath(origin) { + if (origin.startsWith('file://')) { + return origin.replace('file://', ''); + } + return origin; +} + +/** + * Read-only storage driver for local directories of JSON files + */ +export class LocalDirStorageDriver { + /** + * @param {string} dirPath - Path to directory containing packument JSON files + */ + constructor(dirPath) { + this.dirPath = normalizePath(dirPath); + this.supportsBatch = false; + this.supportsBloom = false; + } + + /** + * Get a packument by key (filename) + * @param {string} key - Filename (with or without .json extension) + * @returns {Promise} Parsed JSON content + * @throws {Error} If file not found or invalid JSON + */ + async get(key) { + const filename = key.endsWith('.json') ? key : `${key}.json`; + const filePath = join(this.dirPath, filename); + + try { + const content = await readFile(filePath, 'utf8'); + return JSON.parse(content); + } catch (error) { + if (error.code === 'ENOENT') { + throw new Error(`Key not found: ${key}`); + } + throw error; + } + } + + /** + * Check if a key exists + * @param {string} key - Filename + * @returns {Promise} + */ + async has(key) { + const filename = key.endsWith('.json') ? key : `${key}.json`; + const filePath = join(this.dirPath, filename); + + try { + await access(filePath); + return true; + } catch { + return false; + } + } + + /** + * List all JSON files in the directory + * Note: prefix is ignored for local directories since the directory path + * itself serves as the namespace isolation. + * @param {string} [_prefix] - Ignored for local directories + * @yields {string} Filenames + */ + async *list(_prefix) { + const files = await readdir(this.dirPath); + for (const file of files) { + if (file.endsWith('.json')) { + yield file; + } + } + } + + /** + * Put is not supported - this is a read-only driver + * @throws {Error} Always throws + */ + async put(_key, _value) { + throw new Error('LocalDirStorageDriver is read-only'); + } + + /** + * Delete is not supported - this is a read-only driver + * @throws {Error} Always throws + */ + async delete(_key) { + throw new Error('LocalDirStorageDriver is read-only'); + } + + /** + * Clear is not supported - this is a read-only driver + * @throws {Error} Always throws + */ + async clear() { + throw new Error('LocalDirStorageDriver is read-only'); + } + + /** + * Batch put is not supported - this is a read-only driver + * @throws {Error} Always throws + */ + async putBatch(_entries) { + throw new Error('LocalDirStorageDriver is read-only'); + } + + /** + * Get metadata info for a file (basic implementation) + * @param {string} key - Filename + * @returns {Promise} Basic info or null if not found + */ + async info(key) { + const exists = await this.has(key); + if (!exists) return null; + return { key, path: join(this.dirPath, key) }; + } +} diff --git a/src/cache/storage-driver.js b/src/cache/storage-driver.js index 296e1cb..cf48131 100644 --- a/src/cache/storage-driver.js +++ b/src/cache/storage-driver.js @@ -1,9 +1,21 @@ +import { LocalDirStorageDriver, isLocalPath } from './local-dir-driver.js'; + +export { LocalDirStorageDriver, isLocalPath }; + /** * Creates a storage driver based on the runtime environment * @param {Object} env - Environment configuration + * @param {string} [env.LOCAL_DIR] - Local directory path (read-only driver) + * @param {string} [env.CACHE_DIR] - Cache directory path + * @param {string} [env.RUNTIME] - Runtime environment (node, cloudflare, fastly, cloudrun) * @returns {Object} Storage driver instance */ export async function createStorageDriver(env) { + // Local directory takes precedence - it's a read-only mount + if (env?.LOCAL_DIR) { + return new LocalDirStorageDriver(env.LOCAL_DIR); + } + const runtime = env?.RUNTIME || 'node'; switch (runtime) { diff --git a/src/cache/test/local-dir-driver.test.js b/src/cache/test/local-dir-driver.test.js new file mode 100644 index 0000000..18501fd --- /dev/null +++ b/src/cache/test/local-dir-driver.test.js @@ -0,0 +1,216 @@ +import { describe, it, beforeEach, afterEach } from 'node:test'; +import assert from 'node:assert'; +import { mkdtemp, writeFile, rm, mkdir } from 'node:fs/promises'; +import { join } from 'node:path'; +import { tmpdir } from 'node:os'; +import { LocalDirStorageDriver, isLocalPath } from '../local-dir-driver.js'; + +describe('LocalDirStorageDriver', () => { + let tempDir; + let driver; + + beforeEach(async () => { + tempDir = await mkdtemp(join(tmpdir(), 'local-dir-driver-test-')); + driver = new LocalDirStorageDriver(tempDir); + }); + + afterEach(async () => { + await rm(tempDir, { recursive: true, force: true }); + }); + + describe('list()', () => { + it('should list JSON files in directory', async () => { + await writeFile(join(tempDir, 'lodash.json'), JSON.stringify({ name: 'lodash' })); + await writeFile(join(tempDir, 'react.json'), JSON.stringify({ name: 'react' })); + await writeFile(join(tempDir, 'readme.md'), 'not json'); + + const files = []; + for await (const file of driver.list()) { + files.push(file); + } + + assert.strictEqual(files.length, 2); + assert.ok(files.includes('lodash.json')); + assert.ok(files.includes('react.json')); + assert.ok(!files.includes('readme.md')); + }); + + it('should ignore prefix parameter', async () => { + await writeFile(join(tempDir, 'lodash.json'), JSON.stringify({ name: 'lodash' })); + + const files = []; + for await (const file of driver.list('v1:packument:npm:')) { + files.push(file); + } + + assert.strictEqual(files.length, 1); + assert.strictEqual(files[0], 'lodash.json'); + }); + + it('should handle empty directory', async () => { + const files = []; + for await (const file of driver.list()) { + files.push(file); + } + + assert.strictEqual(files.length, 0); + }); + }); + + describe('get()', () => { + it('should read and parse JSON file', async () => { + const packument = { name: 'lodash', version: '4.17.21' }; + await writeFile(join(tempDir, 'lodash.json'), JSON.stringify(packument)); + + const result = await driver.get('lodash.json'); + assert.deepStrictEqual(result, packument); + }); + + it('should add .json extension if missing', async () => { + const packument = { name: 'lodash', version: '4.17.21' }; + await writeFile(join(tempDir, 'lodash.json'), JSON.stringify(packument)); + + const result = await driver.get('lodash'); + assert.deepStrictEqual(result, packument); + }); + + it('should throw on missing file', async () => { + await assert.rejects( + driver.get('nonexistent.json'), + /Key not found: nonexistent\.json/ + ); + }); + + it('should throw on invalid JSON', async () => { + await writeFile(join(tempDir, 'invalid.json'), 'not valid json'); + + await assert.rejects( + driver.get('invalid.json'), + /Unexpected token/ + ); + }); + }); + + describe('has()', () => { + it('should return true for existing file', async () => { + await writeFile(join(tempDir, 'lodash.json'), JSON.stringify({ name: 'lodash' })); + + const exists = await driver.has('lodash.json'); + assert.strictEqual(exists, true); + }); + + it('should return true with implicit .json extension', async () => { + await writeFile(join(tempDir, 'lodash.json'), JSON.stringify({ name: 'lodash' })); + + const exists = await driver.has('lodash'); + assert.strictEqual(exists, true); + }); + + it('should return false for missing file', async () => { + const exists = await driver.has('nonexistent.json'); + assert.strictEqual(exists, false); + }); + }); + + describe('read-only methods', () => { + it('should throw on put()', async () => { + await assert.rejects( + driver.put('key', { value: 'data' }), + /LocalDirStorageDriver is read-only/ + ); + }); + + it('should throw on delete()', async () => { + await assert.rejects( + driver.delete('key'), + /LocalDirStorageDriver is read-only/ + ); + }); + + it('should throw on clear()', async () => { + await assert.rejects( + driver.clear(), + /LocalDirStorageDriver is read-only/ + ); + }); + + it('should throw on putBatch()', async () => { + await assert.rejects( + driver.putBatch([{ key: 'k', value: 'v' }]), + /LocalDirStorageDriver is read-only/ + ); + }); + }); + + describe('info()', () => { + it('should return info for existing file', async () => { + await writeFile(join(tempDir, 'lodash.json'), JSON.stringify({ name: 'lodash' })); + + const info = await driver.info('lodash.json'); + assert.ok(info); + assert.strictEqual(info.key, 'lodash.json'); + assert.ok(info.path.includes('lodash.json')); + }); + + it('should return null for missing file', async () => { + const info = await driver.info('nonexistent.json'); + assert.strictEqual(info, null); + }); + }); + + describe('file:// URL support', () => { + it('should handle file:// URL paths', async () => { + const fileUrlPath = `file://${tempDir}`; + const urlDriver = new LocalDirStorageDriver(fileUrlPath); + + await writeFile(join(tempDir, 'test.json'), JSON.stringify({ name: 'test' })); + + const files = []; + for await (const file of urlDriver.list()) { + files.push(file); + } + + assert.strictEqual(files.length, 1); + assert.strictEqual(files[0], 'test.json'); + }); + }); +}); + +describe('isLocalPath', () => { + it('should detect file:// URLs', () => { + assert.strictEqual(isLocalPath('file:///path/to/dir'), true); + }); + + it('should detect absolute Unix paths', () => { + assert.strictEqual(isLocalPath('/path/to/dir'), true); + }); + + it('should detect relative paths with ./', () => { + assert.strictEqual(isLocalPath('./path/to/dir'), true); + }); + + it('should detect relative paths with ../', () => { + assert.strictEqual(isLocalPath('../path/to/dir'), true); + }); + + it('should detect Windows absolute paths', () => { + assert.strictEqual(isLocalPath('C:\\path\\to\\dir'), true); + assert.strictEqual(isLocalPath('D:/path/to/dir'), true); + }); + + it('should reject registry URLs', () => { + assert.strictEqual(isLocalPath('https://registry.npmjs.org'), false); + assert.strictEqual(isLocalPath('http://localhost:4873'), false); + }); + + it('should reject encoded origins', () => { + assert.strictEqual(isLocalPath('npm'), false); + assert.strictEqual(isLocalPath('registry.npmjs.org'), false); + }); + + it('should handle null/undefined', () => { + assert.strictEqual(isLocalPath(null), false); + assert.strictEqual(isLocalPath(undefined), false); + assert.strictEqual(isLocalPath(''), false); + }); +}); diff --git a/src/view/index.js b/src/view/index.js index c306711..d45e197 100644 --- a/src/view/index.js +++ b/src/view/index.js @@ -1,5 +1,8 @@ /** * View module - predicate + projection over cached data + * + * For local directory support, use createStorageDriver({ LOCAL_DIR: path }) + * from @_all_docs/cache when creating the cache instance. */ export { View } from './view.js'; export { ViewStore } from './store.js'; diff --git a/src/view/query.js b/src/view/query.js index 441b253..1b67bbc 100644 --- a/src/view/query.js +++ b/src/view/query.js @@ -1,20 +1,25 @@ /** * Query execution for views + * + * The cache instance should be configured with the appropriate storage driver: + * - For registry origins: Use createStorageDriver({ CACHE_DIR: ... }) + * - For local directories: Use createStorageDriver({ LOCAL_DIR: ... }) */ import { createProjection, createFilter } from './projection.js'; /** * Query a view, yielding projected records * @param {View} view - The view to query - * @param {Cache} cache - The cache instance + * @param {Cache} cache - The cache instance configured with appropriate driver * @param {Object} options - Query options * @param {number} [options.limit] - Maximum records to return * @param {string} [options.where] - Additional filter expression * @param {boolean} [options.progress] - Show progress on stderr + * @param {string} [options.keyPrefix] - Override key prefix (defaults to view.getCacheKeyPrefix()) * @yields {Object} Projected records */ export async function* queryView(view, cache, options = {}) { - const { limit, where, progress = false } = options; + const { limit, where, progress = false, keyPrefix } = options; // Compile projection from view's select const project = createProjection({ select: view.select }); @@ -22,7 +27,9 @@ export async function* queryView(view, cache, options = {}) { // Compile additional filter if provided const filter = createFilter({ where }); - const prefix = view.getCacheKeyPrefix(); + // Get key prefix - for local dirs this will be ignored by the driver + const prefix = keyPrefix ?? view.getCacheKeyPrefix(); + let count = 0; let yielded = 0; @@ -41,7 +48,7 @@ export async function* queryView(view, cache, options = {}) { const entry = await cache.fetch(key); if (!entry) continue; - // Cache entries wrap the response - packument is in body + // Extract the packument body from cache entry const value = entry.body || entry; // Apply view's projection