Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions cli/cli/src/cmd/view/query.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { ViewStore, queryView, countView, collectView } from '@_all_docs/view';
import { Cache, createStorageDriver } from '@_all_docs/cache';
import { Cache, createStorageDriver, isLocalPath } from '@_all_docs/cache';

export const usage = `Usage: _all_docs view query <name> [options]

Expand Down Expand Up @@ -43,8 +43,12 @@ export const command = async (cli) => {
process.exit(1);
}

const driver = await createStorageDriver({ CACHE_DIR: cli.dir('packuments') });
const cache = new Cache({ path: cli.dir('packuments'), driver });
// Create appropriate storage driver based on view's origin
const origin = view.registry || view.origin;
const driver = isLocalPath(origin)
? await createStorageDriver({ LOCAL_DIR: origin })
: await createStorageDriver({ CACHE_DIR: cli.dir('packuments') });
const cache = new Cache({ path: origin, driver });

const options = {
limit: cli.values.limit ? parseInt(cli.values.limit, 10) : undefined,
Expand Down
89 changes: 89 additions & 0 deletions doc/cli-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -616,6 +616,95 @@ npx _all_docs cache create-index > previous-index.txt

---

## view

Commands for defining and querying views over cached data.

Views can be created over two types of origins:
- **Registry cache**: Data fetched from npm or other registries
- **Local directory**: JSON packument files in a directory

### view define

Define a named view for querying packuments.

```bash
npx _all_docs view define <name> [options]
```

**Options:**
- `--origin <origin>` - Data origin: encoded name (npm), URL, or local path
- `--registry <url>` - Registry URL (alternative to origin)
- `--select <expr>` - Field selection expression
- `--type <type>` - Entity type: packument, partition (default: packument)
- `--force`, `-f` - Overwrite existing view definition

**Origin Types:**

| Type | Example | Description |
|------|---------|-------------|
| Encoded name | `npm` | Pre-defined registry origin |
| Registry URL | `https://npm.example.com` | Custom registry |
| Local path | `./local-data/` | Directory of JSON files |
| file:// URL | `file:///data/archive/` | Explicit file URL |

**Examples:**

```bash
# Define view over npm registry cache
npx _all_docs view define npm-pkgs --origin npm

# Define view over local directory of packuments
npx _all_docs view define local-snapshot --origin ./local-packuments/

# Using file:// URL for local directory
npx _all_docs view define archive --origin file:///data/npm-archive/
```

### view query

Query a defined view and output results.

```bash
npx _all_docs view query <name> [options]
```

**Options:**
- `--limit <n>` - Maximum records to return
- `--filter <expr>` - Filter expression
- `--count` - Only output the count of matching records

### view join

Join two views on a common key.

```bash
npx _all_docs view join <left> <right> [options]
```

This enables comparing packages across different sources:

```bash
# Compare npm cache against local snapshot
npx _all_docs view define npm --origin npm
npx _all_docs view define snapshot --origin ./snapshot/
npx _all_docs view join npm snapshot --diff --select 'name'
```

### view list

List all defined views.

### view show

Show details of a defined view.

### view delete

Delete a defined view.

---

## Troubleshooting

### Common Issues
Expand Down
2 changes: 1 addition & 1 deletion src/cache/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ export { BaseHTTPClient, createAgent, createDispatcher } from './http.js';
export { CacheEntry } from './entry.js';
export { createCacheKey, decodeCacheKey, createPartitionKey, createPackumentKey, encodeOrigin } from './cache-key.js';
export { PartitionCheckpoint } from './checkpoint.js';
export { createStorageDriver } from './storage-driver.js';
export { createStorageDriver, LocalDirStorageDriver, isLocalPath } from './storage-driver.js';
export { AuthError, TempError, PermError, categorizeHttpError } from './errors.js';
159 changes: 159 additions & 0 deletions src/cache/local-dir-driver.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
/**
* Local directory storage driver - reads packument JSON files from a directory
*
* This is a read-only storage driver that allows mounting existing directories
* of packument JSON files as a virtual cache. Useful for analyzing local datasets
* without importing them into the cache.
*/
import { readdir, readFile, access } from 'node:fs/promises';
import { join } from 'node:path';
import { existsSync } from 'node:fs';

/**
* Check if an origin string represents a local path
* @param {string} origin - Origin string (URL, encoded origin, or path)
* @returns {boolean} True if origin is a local path
*/
export function isLocalPath(origin) {
if (!origin) return false;

// file:// URL
if (origin.startsWith('file://')) return true;

// Absolute path (Unix)
if (origin.startsWith('/')) return true;

// Relative path starting with ./
if (origin.startsWith('./') || origin.startsWith('../')) return true;

// Windows absolute path (C:\, D:\, etc.)
if (/^[A-Za-z]:[\\\/]/.test(origin)) return true;

// Path that exists on disk (fallback check)
if (existsSync(origin)) return true;

return false;
}

/**
* Normalize origin to a filesystem path
* @param {string} origin - Origin string
* @returns {string} Filesystem path
*/
function normalizePath(origin) {
if (origin.startsWith('file://')) {
return origin.replace('file://', '');
}
return origin;
}

/**
* Read-only storage driver for local directories of JSON files
*/
export class LocalDirStorageDriver {
/**
* @param {string} dirPath - Path to directory containing packument JSON files
*/
constructor(dirPath) {
this.dirPath = normalizePath(dirPath);
this.supportsBatch = false;
this.supportsBloom = false;
}

/**
* Get a packument by key (filename)
* @param {string} key - Filename (with or without .json extension)
* @returns {Promise<object>} Parsed JSON content
* @throws {Error} If file not found or invalid JSON
*/
async get(key) {
const filename = key.endsWith('.json') ? key : `${key}.json`;
const filePath = join(this.dirPath, filename);

try {
const content = await readFile(filePath, 'utf8');
return JSON.parse(content);
} catch (error) {
if (error.code === 'ENOENT') {
throw new Error(`Key not found: ${key}`);
}
throw error;
}
}

/**
* Check if a key exists
* @param {string} key - Filename
* @returns {Promise<boolean>}
*/
async has(key) {
const filename = key.endsWith('.json') ? key : `${key}.json`;
const filePath = join(this.dirPath, filename);

try {
await access(filePath);
return true;
} catch {
return false;
}
}

/**
* List all JSON files in the directory
* Note: prefix is ignored for local directories since the directory path
* itself serves as the namespace isolation.
* @param {string} [_prefix] - Ignored for local directories
* @yields {string} Filenames
*/
async *list(_prefix) {
const files = await readdir(this.dirPath);
for (const file of files) {
if (file.endsWith('.json')) {
yield file;
}
}
}

/**
* Put is not supported - this is a read-only driver
* @throws {Error} Always throws
*/
async put(_key, _value) {
throw new Error('LocalDirStorageDriver is read-only');
}

/**
* Delete is not supported - this is a read-only driver
* @throws {Error} Always throws
*/
async delete(_key) {
throw new Error('LocalDirStorageDriver is read-only');
}

/**
* Clear is not supported - this is a read-only driver
* @throws {Error} Always throws
*/
async clear() {
throw new Error('LocalDirStorageDriver is read-only');
}

/**
* Batch put is not supported - this is a read-only driver
* @throws {Error} Always throws
*/
async putBatch(_entries) {
throw new Error('LocalDirStorageDriver is read-only');
}

/**
* Get metadata info for a file (basic implementation)
* @param {string} key - Filename
* @returns {Promise<object|null>} Basic info or null if not found
*/
async info(key) {
const exists = await this.has(key);
if (!exists) return null;
return { key, path: join(this.dirPath, key) };
}
}
12 changes: 12 additions & 0 deletions src/cache/storage-driver.js
Original file line number Diff line number Diff line change
@@ -1,9 +1,21 @@
import { LocalDirStorageDriver, isLocalPath } from './local-dir-driver.js';

export { LocalDirStorageDriver, isLocalPath };

/**
* Creates a storage driver based on the runtime environment
* @param {Object} env - Environment configuration
* @param {string} [env.LOCAL_DIR] - Local directory path (read-only driver)
* @param {string} [env.CACHE_DIR] - Cache directory path
* @param {string} [env.RUNTIME] - Runtime environment (node, cloudflare, fastly, cloudrun)
* @returns {Object} Storage driver instance
*/
export async function createStorageDriver(env) {
// Local directory takes precedence - it's a read-only mount
if (env?.LOCAL_DIR) {
return new LocalDirStorageDriver(env.LOCAL_DIR);
}

const runtime = env?.RUNTIME || 'node';

switch (runtime) {
Expand Down
Loading