From 502c92d626f44fba4a625211273013a62f32acdc Mon Sep 17 00:00:00 2001 From: James Ross Date: Fri, 27 Feb 2026 14:02:24 -0800 Subject: [PATCH 1/3] feat!: ObservabilityPort, streaming restore, parallel I/O (M14 Conduit) BREAKING CHANGE: CasService no longer extends EventEmitter. The new ObservabilityPort replaces all emit() calls with metric(channel, data). EventEmitterObserver provides backward-compatible event bridging. - Add ObservabilityPort, SilentObserver, EventEmitterObserver, StatsCollector - Add restoreStream() async generator with O(chunkSize) streaming - Add concurrency option with Semaphore-gated parallel chunk I/O - Rewrite restoreFile() to use createWriteStream + pipeline - Migrate CLI progress tracking to EventEmitterObserver - 567 tests passing, eslint clean, JSR validated --- CHANGELOG.md | 23 ++ README.md | 10 +- ROADMAP.md | 272 +++++++++++++++++- bin/git-cas.js | 22 +- examples/progress-tracking.js | 50 ++-- index.d.ts | 42 ++- index.js | 51 +++- jsr.json | 2 +- package.json | 2 +- src/domain/services/CasService.d.ts | 20 +- src/domain/services/CasService.js | 195 ++++++++++--- src/domain/services/Semaphore.js | 44 +++ .../adapters/EventEmitterObserver.js | 70 +++++ src/infrastructure/adapters/SilentObserver.js | 11 + src/infrastructure/adapters/StatsCollector.js | 44 +++ src/ports/ObservabilityPort.js | 33 +++ test/unit/cli/progress.test.js | 20 +- .../domain/services/CasService.codec.test.js | 5 +- .../services/CasService.compression.test.js | 2 + .../domain/services/CasService.crypto.test.js | 8 + .../services/CasService.deleteAsset.test.js | 2 + .../services/CasService.empty-file.test.js | 2 + .../domain/services/CasService.errors.test.js | 12 +- .../domain/services/CasService.events.test.js | 87 ++++-- .../CasService.findOrphanedChunks.test.js | 2 + .../domain/services/CasService.kdf.test.js | 2 + .../CasService.key-validation.test.js | 2 + .../domain/services/CasService.merkle.test.js | 2 + .../services/CasService.parallel.test.js | 116 ++++++++ .../services/CasService.readManifest.test.js | 2 + .../services/CasService.restore.test.js | 2 + .../services/CasService.restoreStream.test.js | 155 ++++++++++ .../services/CasService.stream-error.test.js | 2 + test/unit/domain/services/CasService.test.js | 2 + test/unit/domain/services/Semaphore.test.js | 58 ++++ .../adapters/EventEmitterObserver.test.js | 73 +++++ .../adapters/SilentObserver.test.js | 21 ++ .../adapters/StatsCollector.test.js | 51 ++++ test/unit/ports/ObservabilityPort.test.js | 19 ++ 39 files changed, 1393 insertions(+), 145 deletions(-) create mode 100644 src/domain/services/Semaphore.js create mode 100644 src/infrastructure/adapters/EventEmitterObserver.js create mode 100644 src/infrastructure/adapters/SilentObserver.js create mode 100644 src/infrastructure/adapters/StatsCollector.js create mode 100644 src/ports/ObservabilityPort.js create mode 100644 test/unit/domain/services/CasService.parallel.test.js create mode 100644 test/unit/domain/services/CasService.restoreStream.test.js create mode 100644 test/unit/domain/services/Semaphore.test.js create mode 100644 test/unit/infrastructure/adapters/EventEmitterObserver.test.js create mode 100644 test/unit/infrastructure/adapters/SilentObserver.test.js create mode 100644 test/unit/infrastructure/adapters/StatsCollector.test.js create mode 100644 test/unit/ports/ObservabilityPort.test.js diff --git a/CHANGELOG.md b/CHANGELOG.md index fb74640d..7291b151 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,29 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [4.0.0] — Conduit (2026-02-27) + +### Breaking Changes +- **`CasService` no longer extends `EventEmitter`** — event subscriptions must use the new `ObservabilityPort` adapters instead of `service.on()`. The `EventEmitterObserver` adapter provides full backward compatibility for existing event-based code. +- **`observability` is a required constructor port** for `CasService`. The facade (`ContentAddressableStore`) defaults to `SilentObserver` when omitted. + +### Added +- **ObservabilityPort** — new hexagonal port (`src/ports/ObservabilityPort.js`) with `metric(channel, data)`, `log(level, msg, meta?)`, and `span(name)` methods. Decouples the domain layer from Node's event infrastructure. +- **SilentObserver** — no-op adapter (default). Zero overhead when observability is not needed. +- **EventEmitterObserver** — bridges `metric()` calls to EventEmitter events (`chunk:stored`, `file:restored`, etc.) for backward-compatible progress tracking. Exposes `.on()`, `.removeListener()`, `.listenerCount()`. +- **StatsCollector** — accumulates metrics and exposes `summary()` with `chunksProcessed`, `bytesTotal`, `elapsed`, `throughput`, and `errors`. +- **`restoreStream()`** — new async generator on `CasService` and facade. Returns `AsyncIterable` for streaming restore with O(chunkSize) memory for unencrypted, uncompressed files. Encrypted/compressed files buffer internally but expose the same streaming API. +- **`restoreFile()` now uses streaming I/O** — writes via `createWriteStream` + `pipeline` instead of buffering the entire file with `writeFileSync`. +- **Parallel chunk I/O** — new `concurrency` option (default: 1). Store operations launch chunk writes through a counting semaphore. Streaming restore uses read-ahead for concurrent blob fetches. `concurrency: 1` produces identical sequential behavior. +- **Semaphore** — internal counting semaphore (`src/domain/services/Semaphore.js`) for concurrency control. +- 43 new unit tests (567 total). + +### Changed +- CLI `store` and `restore` commands now create an `EventEmitterObserver` and pass it to the CAS instance, attaching progress tracking to the observer instead of the service. +- `restore()` reimplemented as a collector over `restoreStream()`. +- `_chunkAndStore()` refactored to use semaphore-gated parallel writes with `Promise.all`, sorting results by index after completion. +- Progress tracking example (`examples/progress-tracking.js`) updated to use `EventEmitterObserver` pattern. + ## [3.1.0] — Bijou (2026-02-27) ### Added diff --git a/README.md b/README.md index 166a2b4f..f7402214 100644 --- a/README.md +++ b/README.md @@ -44,15 +44,13 @@ We use the object database. See [CHANGELOG.md](./CHANGELOG.md) for the full list of changes. -## What's new in v3.1.0 +## What's new in v4.0.0 -**Interactive vault dashboard** — `git cas vault dashboard` launches a full TUI with split-pane layout, manifest detail view, keyboard navigation, and real-time filtering. Browse entries, inspect chunks, and view encryption status without memorizing CLI flags. +**ObservabilityPort** — `CasService` no longer extends `EventEmitter`. A new hexagonal `ObservabilityPort` decouples the domain from Node's event infrastructure. Three adapters ship out of the box: `SilentObserver` (no-op default), `EventEmitterObserver` (backward-compatible event bridge), and `StatsCollector` (metric accumulator). -**Progress bars** — `store` and `restore` now show animated progress with throughput reporting. Suppress with `--quiet`. +**Streaming restore** — `restoreStream()` returns an `AsyncIterable` with O(chunkSize) memory for unencrypted files. `restoreFile()` now writes via `createWriteStream` + `pipeline` instead of buffering. -**Pretty history** — `git cas vault history --pretty` renders a color-coded, paginated timeline of vault commits. - -**Inspect command** — `git cas inspect ` renders manifest details with chunk table, encryption info, and compression badges. +**Parallel chunk I/O** — new `concurrency` option gates store writes and restore reads through a counting semaphore. `concurrency: 4` can significantly speed up large-file operations. See [CHANGELOG.md](./CHANGELOG.md) for the full list of changes. diff --git a/ROADMAP.md b/ROADMAP.md index 470c18e5..98b87d27 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -185,6 +185,7 @@ Return and throw semantics for every public method (current and planned). | Version | Milestone | Codename | Theme | Status | |--------:|-----------|----------|-------|--------| +| v4.0.0 | M14 | Conduit | Streaming I/O, observability, parallel chunks | | | v2.1.0 | M8 | Spit Shine | Review fixups | | | v2.2.0 | M9 | Cockpit | CLI improvements | | | v3.0.0 | M10 | Hydra | Content-defined chunking | | @@ -201,15 +202,19 @@ M7 Horizon (v2.0.0) ✅ ────────────────── │ │ ├──────┬──────────┐ │ v v v v -M8 Spit M9 Cockpit M10 Hydra (v3.0.0) M11 Locksmith (v3.1.0) +M8 Spit M9 Cockpit M10 Hydra M11 Locksmith Shine (v2.2.0) │ │ (v2.1.0) │ │ v - │ v M12 Carousel (v3.2.0) + │ v M12 Carousel │ (CDC benchmarks) │ v M13 Bijou (v3.1.0) ✅ (TUI dashboard & progress) + │ + v + M14 Conduit (v4.0.0) ◀── NEXT + (Streaming I/O + Observability + Parallel chunks) ``` --- @@ -220,13 +225,272 @@ Shine (v2.2.0) │ │ | # | Codename | Theme | Version | Tasks | ~LoC | ~Hours | |---:|--------------|----------------------------|:-------:|------:|-------:|------:| +| M14| Conduit | Streaming I/O, observability, parallel chunks | v4.0.0 | 4 | ~600 | ~18h | | M8 | Spit Shine | Review fixups | v2.1.0 | 3 | ~290 | ~7h | | M9 | Cockpit | CLI improvements | v2.2.0 | 5 | ~260 | ~7h | | M10| Hydra | Content-defined chunking | v3.0.0 | 4 | ~690 | ~22h | | M11| Locksmith | Multi-recipient encryption | v3.1.0 | 4 | ~580 | ~20h | | M12| Carousel | Key rotation | v3.2.0 | 4 | ~400 | ~13h | | M13| Bijou | TUI dashboard & progress | v3.1.0 | 6 | ~650 | ~20h | -| | **Total** | | | **26**| **~2,870** | **~89h** | +| | **Total** | | | **30**| **~3,470** | **~107h** | + +--- + +# M14 — Conduit (v4.0.0) +**Theme:** Replace `EventEmitter` inheritance with a proper `ObservabilityPort`, add streaming restore, and enable parallel chunk I/O. Major version bump: removes `extends EventEmitter` from `CasService`, adds `observability` as a required constructor port. + +--- + +## Task 14.1: ObservabilityPort and adapters + +**User Story** +As a library consumer, I want structured observability (metrics, logs, spans) from CAS operations so I can monitor throughput, track errors, and integrate with my own tooling — without the domain layer depending on Node's EventEmitter. + +**Requirements** +- R1: Define `ObservabilityPort` interface with three methods: + - `metric(channel: string, data: object)` — emit a named metric (channels: `chunk`, `file`, `integrity`, `vault`). + - `log(level: string, message: string, meta?: object)` — structured log (`debug`, `info`, `warn`, `error`). + - `span(name: string) → { end(meta?: object): void }` — timed operation bracket. +- R2: Remove `extends EventEmitter` from `CasService`. All `this.emit()` calls replaced with `this.observability.metric()` or `this.observability.log()`. +- R3: `observability` becomes a required constructor parameter on `CasService` (like `persistence`, `codec`, `crypto`). +- R4: Implement `SilentObserver` adapter (no-op — all methods are empty). This is the default when no observability is needed. +- R5: Implement `EventEmitterObserver` adapter that translates `metric()` calls to `EventEmitter.emit()` calls for backward compatibility. Consumers who relied on `service.on('chunk:stored', ...)` can wrap with this adapter. +- R6: Implement `StatsCollector` adapter that accumulates metrics and exposes a summary object: `{ chunksProcessed, bytesTotal, elapsed, throughput, errors }`. +- R7: Facade (`ContentAddressableStore`) creates a default `SilentObserver` if no observability adapter is provided, and passes it to `CasService`. +- R8: Update `.d.ts` declarations for new port and adapters. + +**Acceptance Criteria** +- AC1: `CasService` no longer extends `EventEmitter`. +- AC2: All existing event emission points emit metrics via `ObservabilityPort`. +- AC3: `EventEmitterObserver` adapter produces identical events to the old `extends EventEmitter` behavior. +- AC4: `StatsCollector` accumulates correct stats across a full store+restore cycle. +- AC5: `SilentObserver` introduces zero overhead (no-op methods). +- AC6: Span `end()` captures elapsed time in the metric. + +**Scope** +- In scope: Port definition, 3 adapters, CasService refactor, facade wiring, TypeScript declarations. +- Out of scope: TUI adapter (M13 already has its own bijou integration — it can wrap `EventEmitterObserver` or adopt `ObservabilityPort` in a follow-up). Log levels beyond the 4 basics. Persistent metrics storage. + +**Est. Complexity (LoC)** +- Prod: ~180 (port ~30, 3 adapters ~90, CasService refactor ~40, facade ~20) +- Tests: ~120 +- Total: ~300 + +**Est. Human Working Hours** +- ~8h + +**Test Plan** +- Golden path: + - Store file with `StatsCollector` → verify `chunksProcessed`, `bytesTotal`, `throughput` are correct. + - Store + restore with `EventEmitterObserver` → assert same events as old EventEmitter behavior. + - `SilentObserver` → store + restore completes with no errors, no output. +- Failures: + - Missing `observability` param → constructor throws with descriptive error. + - Corrupted chunk → `observability.log('error', ...)` called before throw. +- Edges: + - 0-byte file → span starts and ends, no chunk metrics emitted. + - Span `end()` called twice → no error (idempotent). +- Fuzz/stress: + - All existing CasService tests must pass with `SilentObserver` injected. + +**Definition of Done** +- DoD1: `CasService` does not extend `EventEmitter`. +- DoD2: `ObservabilityPort` defined with metric/log/span. +- DoD3: 3 adapters implemented and tested. +- DoD4: All existing tests updated and green. +- DoD5: TypeScript declarations updated. + +**Blocking** +- Blocks: Task 14.2, 14.3, 14.4 + +**Blocked By** +- Blocked by: None + +--- + +## Task 14.2: Streaming restore + +**User Story** +As a developer restoring large files, I want a streaming restore path so memory usage is O(chunkSize), not O(fileSize). + +**Requirements** +- R1: Add `CasService.restoreStream({ manifest, encryptionKey, passphrase })` returning `AsyncIterable`. +- R2: Each yielded buffer is one verified, decrypted, decompressed chunk — ready to write. +- R3: Integrity verified per-chunk before yield (not after full reassembly). +- R4: Decompression and decryption applied per-chunk in streaming fashion. +- R5: `restoreFile()` in the facade uses `restoreStream()` internally with `createWriteStream()` instead of `writeFileSync()`. +- R6: Existing `restore()` method reimplemented as: collect `restoreStream()` into buffer. Single code path, two interfaces. +- R7: Emit `observability.metric('chunk', ...)` per chunk and `observability.span('restore')` for the full operation. + +**Acceptance Criteria** +- AC1: `restoreStream()` yields chunks that, when concatenated, match the original file byte-for-byte. +- AC2: Memory usage during streaming restore is O(chunkSize), not O(fileSize). +- AC3: `restoreFile()` writes via `createWriteStream()` — no `writeFileSync()`. +- AC4: Encrypted + compressed files round-trip correctly via streaming restore. +- AC5: Existing `restore()` method returns identical results (backward compat). + +**Scope** +- In scope: `restoreStream()` on CasService + facade, refactor `restoreFile()` and `restore()`. +- Out of scope: Parallel chunk reads (Task 14.3), resume/partial restore. + +**Est. Complexity (LoC)** +- Prod: ~80 +- Tests: ~100 +- Total: ~180 + +**Est. Human Working Hours** +- ~5h + +**Test Plan** +- Golden path: + - Store 10KB → restoreStream → collect → byte-compare original. + - Store encrypted + compressed → restoreStream → collect → compare. + - restoreFile writes correct file via streaming (spy confirms no writeFileSync). +- Failures: + - Corrupted chunk mid-stream → throws INTEGRITY_ERROR, iteration stops. + - Wrong key → throws INTEGRITY_ERROR on first encrypted chunk. +- Edges: + - 0-byte manifest yields empty iterable. + - Single-chunk file yields exactly 1 buffer. + - Exact multiple of chunkSize yields expected count. +- Fuzz/stress: + - 50 random file sizes (seeded) — streaming restore matches buffered restore byte-for-byte. + +**Definition of Done** +- DoD1: `restoreStream()` implemented on CasService and exposed via facade. +- DoD2: `restoreFile()` refactored to use streaming writes. +- DoD3: `restore()` reimplemented on top of `restoreStream()`. +- DoD4: All existing restore tests still pass. +- DoD5: New streaming tests added and green. + +**Blocking** +- Blocks: Task 14.3 + +**Blocked By** +- Blocked by: Task 14.1 (observability wiring) + +--- + +## Task 14.3: Parallel chunk I/O + +**User Story** +As a user storing or restoring files with many chunks, I want the system to read/write multiple chunks concurrently so operations complete faster. + +**Requirements** +- R1: Add `concurrency` option to `CasService` constructor (positive integer, default: 1). +- R2: Store path (`_chunkAndStore`): up to N chunks written to Git in parallel. Chunk ordering in the manifest is preserved regardless of write completion order. +- R3: Restore path (`restoreStream`): up to N chunks read from Git in parallel. Yield order matches manifest chunk order (read ahead, buffer up to N, yield in sequence). +- R4: Implement a simple `Semaphore` utility (internal, not exported) to gate concurrent persistence calls. +- R5: `concurrency: 1` produces identical behavior to current sequential code (no functional change). +- R6: Emit `observability.metric('chunk', ...)` per chunk regardless of parallelism. `observability.span('chunk:read')` / `observability.span('chunk:write')` wrap each individual I/O operation. +- R7: Expose `concurrency` option on `ContentAddressableStore` constructor, forwarded to `CasService`. + +**Acceptance Criteria** +- AC1: With `concurrency: 4`, a 20-chunk store completes measurably faster than sequential (benchmark, not unit test). +- AC2: With `concurrency: 4`, restore produces byte-identical output to sequential. +- AC3: With `concurrency: 1`, all existing tests pass unchanged. +- AC4: Manifest chunk order is always preserved regardless of concurrency setting. +- AC5: Semaphore correctly limits concurrent persistence calls. + +**Scope** +- In scope: Semaphore, parallel store loop, parallel restore with ordered yield, concurrency config. +- Out of scope: Adaptive concurrency (auto-tuning), per-operation concurrency overrides, connection pooling in GitPersistenceAdapter. + +**Est. Complexity (LoC)** +- Prod: ~100 (Semaphore ~25, store refactor ~30, restore refactor ~30, config ~15) +- Tests: ~80 +- Total: ~180 + +**Est. Human Working Hours** +- ~6h + +**Test Plan** +- Golden path: + - Store + restore with concurrency: 4, verify byte-for-byte match. + - Store + restore with concurrency: 1, verify identical to current behavior. + - Encrypted + compressed + concurrency: 4 → correct round-trip. +- Failures: + - concurrency: 0 → constructor throws. + - concurrency: -1 → constructor throws. + - One chunk write fails mid-batch → error propagated, partial writes are safe (unreachable blobs GC'd by Git). +- Edges: + - File with 1 chunk + concurrency: 4 → works (no deadlock). + - File with 3 chunks + concurrency: 10 → only 3 in flight. + - 0-byte file + any concurrency → no-op. +- Fuzz/stress: + - Benchmark: 100-chunk file, concurrency 1 vs 4 vs 8, measure wall-clock time. + +**Definition of Done** +- DoD1: Semaphore utility implemented. +- DoD2: Store and restore support configurable concurrency. +- DoD3: All tests pass at concurrency: 1. +- DoD4: Parallel tests added and green. +- DoD5: Benchmark script demonstrates speedup. + +**Blocking** +- Blocks: None + +**Blocked By** +- Blocked by: Task 14.2 (restoreStream) + +--- + +## Task 14.4: Migrate CLI and TUI to ObservabilityPort + +**User Story** +As a CLI user, I want progress bars and stats to work with the new observability system so the terminal experience is unchanged after the v4 migration. + +**Requirements** +- R1: Refactor `bin/ui/progress.js` to subscribe to `ObservabilityPort` metrics instead of EventEmitter events. +- R2: Progress trackers use `observability.metric('chunk', ...)` events for progress updates. +- R3: CLI `store` and `restore` commands wire the observability adapter into CasService via the facade. +- R4: Dashboard and other TUI components continue to function (adapt to new metric format if needed). +- R5: `--quiet` flag still works (uses `SilentObserver`). +- R6: Stats summary printed after store/restore when not in quiet mode (throughput, total bytes, elapsed time). + +**Acceptance Criteria** +- AC1: `git cas store` shows progress bar identical to v3.1.0 behavior. +- AC2: `git cas restore` shows progress bar identical to v3.1.0 behavior. +- AC3: `--quiet` suppresses all output. +- AC4: Stats summary displayed after operation completes. +- AC5: Dashboard renders correctly with new observability wiring. + +**Scope** +- In scope: CLI progress migration, stats summary, dashboard adaptation. +- Out of scope: New TUI features, log file output, verbose debug mode. + +**Est. Complexity (LoC)** +- Prod: ~60 (progress refactor ~30, CLI wiring ~20, stats display ~10) +- Tests: ~20 +- Total: ~80 + +**Est. Human Working Hours** +- ~3h + +**Test Plan** +- Golden path: + - Store with progress → verify metric events drive progress display. + - Restore with progress → same. + - Stats summary printed with correct values. +- Failures: + - None expected (thin adapter layer). +- Edges: + - Quiet mode → SilentObserver, no output. + - Pipe mode → no progress, no stats. +- Fuzz/stress: + - None (display layer). + +**Definition of Done** +- DoD1: Progress bars work with ObservabilityPort. +- DoD2: Stats summary displays after operations. +- DoD3: All CLI tests pass. +- DoD4: Dashboard functional with new wiring. + +**Blocking** +- Blocks: None + +**Blocked By** +- Blocked by: Task 14.1 (ObservabilityPort) --- @@ -235,7 +499,7 @@ Shine (v2.2.0) │ │ --- -## Task 8.1: Streaming restore +## Task 8.1: Streaming restore *(superseded by Task 14.2)* **User Story** As a developer restoring large files, I want a streaming restore path so I don't buffer the entire file in memory. diff --git a/bin/git-cas.js b/bin/git-cas.js index 47cdc014..f1385814 100755 --- a/bin/git-cas.js +++ b/bin/git-cas.js @@ -3,7 +3,7 @@ import { readFileSync } from 'node:fs'; import { program } from 'commander'; import GitPlumbing, { ShellRunnerFactory } from '@git-stunts/plumbing'; -import ContentAddressableStore from '../index.js'; +import ContentAddressableStore, { EventEmitterObserver } from '../index.js'; import Manifest from '../src/domain/value-objects/Manifest.js'; import { createStoreProgress, createRestoreProgress } from './ui/progress.js'; import { renderEncryptionCard } from './ui/encryption-card.js'; @@ -14,7 +14,7 @@ import { renderHeatmap } from './ui/heatmap.js'; program .name('git-cas') .description('Content Addressable Storage backed by Git') - .version('3.1.0') + .version('4.0.0') .option('-q, --quiet', 'Suppress progress output'); /** @@ -25,12 +25,12 @@ function readKeyFile(keyFilePath) { } /** - * Create a CAS instance for the given working directory. + * Create a CAS instance for the given working directory with an optional observability adapter. */ -function createCas(cwd) { +function createCas(cwd, { observability } = {}) { const runner = ShellRunnerFactory.create(); const plumbing = new GitPlumbing({ runner, cwd }); - return new ContentAddressableStore({ plumbing }); + return new ContentAddressableStore({ plumbing, observability }); } /** @@ -104,18 +104,18 @@ program .option('--cwd ', 'Git working directory', '.') .action(async (file, opts) => { try { - const cas = createCas(opts.cwd); + const observer = new EventEmitterObserver(); + const cas = createCas(opts.cwd, { observability: observer }); const encryptionKey = await resolveEncryptionKey(cas, opts); const storeOpts = { filePath: file, slug: opts.slug }; if (encryptionKey) { storeOpts.encryptionKey = encryptionKey; } - const service = await cas.getService(); const progress = createStoreProgress({ filePath: file, chunkSize: cas.chunkSize, quiet: program.opts().quiet, }); - progress.attach(service); + progress.attach(observer); let manifest; try { manifest = await cas.storeFile(storeOpts); @@ -202,7 +202,8 @@ program .action(async (opts) => { try { validateRestoreFlags(opts); - const cas = createCas(opts.cwd); + const observer = new EventEmitterObserver(); + const cas = createCas(opts.cwd, { observability: observer }); const treeOid = opts.oid || await cas.resolveVaultEntry({ slug: opts.slug }); const manifest = await cas.readManifest({ treeOid }); @@ -212,11 +213,10 @@ program restoreOpts.encryptionKey = encryptionKey; } - const service = await cas.getService(); const progress = createRestoreProgress({ totalChunks: manifest.chunks.length, quiet: program.opts().quiet, }); - progress.attach(service); + progress.attach(observer); let bytesWritten; try { ({ bytesWritten } = await cas.restoreFile({ diff --git a/examples/progress-tracking.js b/examples/progress-tracking.js index 44b5dbb8..c45d6fe4 100755 --- a/examples/progress-tracking.js +++ b/examples/progress-tracking.js @@ -1,9 +1,9 @@ #!/usr/bin/env node /** - * Progress tracking demonstration using EventEmitter + * Progress tracking demonstration using EventEmitterObserver * * This example shows: - * 1. Accessing the CasService to attach event listeners + * 1. Creating an EventEmitterObserver for observability * 2. Tracking chunk-by-chunk progress during store * 3. Tracking chunk-by-chunk progress during restore * 4. Building a real-time progress indicator @@ -16,7 +16,7 @@ import { execSync } from 'node:child_process'; import path from 'node:path'; import os from 'node:os'; import GitPlumbing from '@git-stunts/plumbing'; -import ContentAddressableStore from '@git-stunts/git-cas'; +import ContentAddressableStore, { EventEmitterObserver } from '@git-stunts/git-cas'; console.log('=== Progress Tracking Example ===\n'); @@ -25,8 +25,9 @@ const repoDir = mkdtempSync(path.join(os.tmpdir(), 'cas-progress-')); console.log(`Created temporary repository: ${repoDir}`); execSync('git init --bare', { cwd: repoDir, stdio: 'ignore' }); -// Initialize plumbing and CAS +// Initialize plumbing and CAS with an EventEmitterObserver const plumbing = GitPlumbing.createDefault({ cwd: repoDir }); +const observer = new EventEmitterObserver(); const cas = ContentAddressableStore.createJson({ plumbing, chunkSize: 128 * 1024 }); // 128 KB chunks // Create a larger test file to see multiple chunks @@ -41,55 +42,52 @@ console.log(`File size: ${fileSize.toLocaleString()} bytes`); console.log(`Chunk size: ${(128 * 1024).toLocaleString()} bytes`); console.log(`Expected chunks: ~${Math.ceil(fileSize / (128 * 1024))}`); -// Get the CasService to attach event listeners -const service = await cas.getService(); - // Progress tracker state const progress = { store: { chunks: 0, bytes: 0 }, restore: { chunks: 0, bytes: 0 } }; -// Event listeners for storage operations +// Event listeners for storage operations — subscribe on the observer console.log('\n--- Setting up event listeners ---'); -service.on('chunk:stored', (event) => { +observer.on('chunk:stored', (event) => { progress.store.chunks++; progress.store.bytes += event.size; console.log(`[STORE] Chunk ${event.index} stored: ${event.size.toLocaleString()} bytes (digest: ${event.digest.substring(0, 8)}...)`); }); -service.on('file:stored', (event) => { +observer.on('file:stored', (event) => { console.log(`[STORE] File complete: ${event.slug}`); console.log(` Total size: ${event.size.toLocaleString()} bytes`); console.log(` Total chunks: ${event.chunkCount}`); console.log(` Encrypted: ${event.encrypted ? 'Yes' : 'No'}`); }); -service.on('chunk:restored', (event) => { +observer.on('chunk:restored', (event) => { progress.restore.chunks++; progress.restore.bytes += event.size; console.log(`[RESTORE] Chunk ${event.index} restored: ${event.size.toLocaleString()} bytes (digest: ${event.digest.substring(0, 8)}...)`); }); -service.on('file:restored', (event) => { +observer.on('file:restored', (event) => { console.log(`[RESTORE] File complete: ${event.slug}`); console.log(` Total size: ${event.size.toLocaleString()} bytes`); console.log(` Total chunks: ${event.chunkCount}`); }); -service.on('integrity:pass', (event) => { +observer.on('integrity:pass', (event) => { console.log(`[INTEGRITY] Passed for: ${event.slug}`); }); -service.on('integrity:fail', (event) => { +observer.on('integrity:fail', (event) => { console.error(`[INTEGRITY] FAILED for: ${event.slug}`); console.error(` Chunk index: ${event.chunkIndex}`); console.error(` Expected: ${event.expected}`); console.error(` Actual: ${event.actual}`); }); -service.on('error', (event) => { +observer.on('error', (event) => { console.error(`[ERROR] ${event.code}: ${event.message}`); }); @@ -103,9 +101,16 @@ console.log(' - integrity:fail'); console.log(' - error'); // Step 1: Store the file with progress tracking +// NOTE: We pass the observer to createJson options for the CAS to use it +const cas2 = new ContentAddressableStore({ + plumbing: GitPlumbing.createDefault({ cwd: repoDir }), + chunkSize: 128 * 1024, + observability: observer, +}); + console.log('\n--- Step 1: Storing file (watch for chunk events) ---\n'); const startStore = Date.now(); -const manifest = await cas.storeFile({ +const manifest = await cas2.storeFile({ filePath: testFilePath, slug: 'progress-demo', filename: 'large-file.bin' @@ -128,7 +133,7 @@ console.log(`Throughput: ${storeThroughputMBps.toFixed(2)} MB/s`); // Step 2: Restore the file with progress tracking console.log('\n--- Step 2: Restoring file (watch for chunk events) ---\n'); const startRestore = Date.now(); -const { buffer, bytesWritten } = await cas.restore({ manifest }); +const { buffer, bytesWritten } = await cas2.restore({ manifest }); const restoreTime = Date.now() - startRestore; console.log(`\nRestore completed in ${restoreTime}ms`); @@ -152,7 +157,7 @@ if (!contentMatches) { // Step 3: Run integrity verification with events console.log('\n--- Step 3: Integrity verification (watch for events) ---\n'); const startVerify = Date.now(); -const isValid = await cas.verifyIntegrity(manifest); +const isValid = await cas2.verifyIntegrity(manifest); const verifyTime = Date.now() - startVerify; console.log(`\nIntegrity verification completed in ${verifyTime}ms`); @@ -183,13 +188,13 @@ const progressListener = (event) => { process.stdout.write(`\rProgress: [${progressBar}] ${percentage}% (${storeChunkCount}/${totalChunks} chunks)`); }; -service.on('chunk:stored', progressListener); +observer.on('chunk:stored', progressListener); // Store another test file const testFilePath2 = path.join(testDir, 'progress-demo.bin'); writeFileSync(testFilePath2, randomBytes(fileSize)); -const manifest2 = await cas.storeFile({ +const manifest2 = await cas2.storeFile({ filePath: testFilePath2, slug: 'progress-demo-2', filename: 'progress-demo.bin' @@ -199,7 +204,7 @@ console.log('\n\nProgress tracking complete!'); console.log(`Final chunk count: ${storeChunkCount}`); // Remove the progress listener to avoid cluttering output -service.removeListener('chunk:stored', progressListener); +observer.removeListener('chunk:stored', progressListener); // Summary statistics console.log('\n--- Performance Summary ---'); @@ -224,7 +229,8 @@ console.log('Temporary files removed'); console.log('\n=== Example completed successfully! ==='); console.log('\nKey takeaways:'); -console.log('- Access CasService via cas.getService() for events'); +console.log('- Create an EventEmitterObserver and pass it to ContentAddressableStore'); +console.log('- Subscribe to events on the observer (not the service)'); console.log('- chunk:stored fires for each chunk during storage'); console.log('- chunk:restored fires for each chunk during restore'); console.log('- file:stored and file:restored fire when operations complete'); diff --git a/index.d.ts b/index.d.ts index 26c0fc4f..71da27db 100644 --- a/index.d.ts +++ b/index.d.ts @@ -11,13 +11,14 @@ import type { CryptoPort, CodecPort, GitPersistencePort, + ObservabilityPort, CasServiceOptions, DeriveKeyOptions, DeriveKeyResult, } from "./src/domain/services/CasService.js"; export { CasService, Manifest, Chunk }; -export type { EncryptionMeta, ManifestData, CompressionMeta, KdfParams, SubManifestRef, CryptoPort, CodecPort, GitPersistencePort, CasServiceOptions, DeriveKeyOptions, DeriveKeyResult }; +export type { EncryptionMeta, ManifestData, CompressionMeta, KdfParams, SubManifestRef, CryptoPort, CodecPort, GitPersistencePort, ObservabilityPort, CasServiceOptions, DeriveKeyOptions, DeriveKeyResult }; /** Abstract port for cryptographic operations. */ export declare class CryptoPortBase { @@ -93,14 +94,47 @@ export declare class CborCodec extends CodecPortBase { constructor(); } +/** No-op observability adapter. */ +export declare class SilentObserver { + metric(channel: string, data: Record): void; + log(level: string, msg: string, meta?: Record): void; + span(name: string): { end(meta?: Record): void }; +} + +/** EventEmitter-based observability adapter for backward compatibility. */ +export declare class EventEmitterObserver { + metric(channel: string, data: Record): void; + log(level: string, msg: string, meta?: Record): void; + span(name: string): { end(meta?: Record): void }; + on(event: string, listener: (...args: unknown[]) => void): this; + removeListener(event: string, listener: (...args: unknown[]) => void): this; + listenerCount(event: string): number; +} + +/** Stats-collecting observability adapter. */ +export declare class StatsCollector { + metric(channel: string, data: Record): void; + log(level: string, msg: string, meta?: Record): void; + span(name: string): { end(meta?: Record): void }; + summary(): { + chunksProcessed: number; + bytesTotal: number; + elapsed: number; + throughput: number; + errors: number; + }; +} + /** Constructor options for {@link ContentAddressableStore}. */ export interface ContentAddressableStoreOptions { plumbing: unknown; chunkSize?: number; codec?: CodecPort; crypto?: CryptoPort; + observability?: ObservabilityPort; policy?: unknown; merkleThreshold?: number; + concurrency?: number; } /** A single vault entry. */ @@ -257,6 +291,12 @@ export default class ContentAddressableStore { passphrase?: string; }): Promise<{ buffer: Buffer; bytesWritten: number }>; + restoreStream(options: { + manifest: Manifest; + encryptionKey?: Buffer; + passphrase?: string; + }): AsyncIterable; + createTree(options: { manifest: Manifest }): Promise; verifyIntegrity(manifest: Manifest): Promise; diff --git a/index.js b/index.js index e7226fe9..71f3b8dd 100644 --- a/index.js +++ b/index.js @@ -3,8 +3,11 @@ * @fileoverview Content Addressable Store - Managed blob storage in Git. */ -import { createReadStream, writeFileSync } from 'node:fs'; +import { createReadStream } from 'node:fs'; import path from 'node:path'; +import { Readable, Transform } from 'node:stream'; +import { pipeline } from 'node:stream/promises'; +import { createWriteStream } from 'node:fs'; import CasService from './src/domain/services/CasService.js'; import VaultService from './src/domain/services/VaultService.js'; import GitPersistenceAdapter from './src/infrastructure/adapters/GitPersistenceAdapter.js'; @@ -13,8 +16,12 @@ import NodeCryptoAdapter from './src/infrastructure/adapters/NodeCryptoAdapter.j import Manifest from './src/domain/value-objects/Manifest.js'; import Chunk from './src/domain/value-objects/Chunk.js'; import CryptoPort from './src/ports/CryptoPort.js'; +import ObservabilityPort from './src/ports/ObservabilityPort.js'; import JsonCodec from './src/infrastructure/codecs/JsonCodec.js'; import CborCodec from './src/infrastructure/codecs/CborCodec.js'; +import SilentObserver from './src/infrastructure/adapters/SilentObserver.js'; +import EventEmitterObserver from './src/infrastructure/adapters/EventEmitterObserver.js'; +import StatsCollector from './src/infrastructure/adapters/StatsCollector.js'; export { CasService, @@ -23,10 +30,14 @@ export { GitRefAdapter, NodeCryptoAdapter, CryptoPort, + ObservabilityPort, Manifest, Chunk, JsonCodec, - CborCodec + CborCodec, + SilentObserver, + EventEmitterObserver, + StatsCollector, }; /** @@ -58,16 +69,20 @@ export default class ContentAddressableStore { * @param {number} [options.chunkSize] - Chunk size in bytes (default 256 KiB). * @param {import('./src/ports/CodecPort.js').default} [options.codec] - Manifest codec (default JsonCodec). * @param {import('./src/ports/CryptoPort.js').default} [options.crypto] - Crypto adapter (auto-detected if omitted). + * @param {import('./src/ports/ObservabilityPort.js').default} [options.observability] - Observability adapter (SilentObserver if omitted). * @param {import('@git-stunts/alfred').Policy} [options.policy] - Resilience policy for Git I/O. * @param {number} [options.merkleThreshold=1000] - Chunk count threshold for Merkle manifests. + * @param {number} [options.concurrency=1] - Maximum parallel chunk I/O operations. */ - constructor({ plumbing, chunkSize, codec, policy, crypto, merkleThreshold }) { + constructor({ plumbing, chunkSize, codec, policy, crypto, observability, merkleThreshold, concurrency }) { this.plumbing = plumbing; this.chunkSizeConfig = chunkSize; this.codecConfig = codec; this.policyConfig = policy; this.cryptoConfig = crypto; + this.observabilityConfig = observability; this.merkleThresholdConfig = merkleThreshold; + this.concurrencyConfig = concurrency; this.service = null; this.#servicePromise = null; } @@ -104,7 +119,9 @@ export default class ContentAddressableStore { chunkSize: this.chunkSizeConfig, codec: this.codecConfig || new JsonCodec(), crypto, + observability: this.observabilityConfig || new SilentObserver(), merkleThreshold: this.merkleThresholdConfig, + concurrency: this.concurrencyConfig, }); const ref = new GitRefAdapter({ @@ -253,12 +270,17 @@ export default class ContentAddressableStore { */ async restoreFile({ manifest, encryptionKey, passphrase, outputPath }) { const service = await this.#getService(); - const { buffer, bytesWritten } = await service.restore({ - manifest, - encryptionKey, - passphrase, + const iterable = service.restoreStream({ manifest, encryptionKey, passphrase }); + const readable = Readable.from(iterable); + const writable = createWriteStream(outputPath); + let bytesWritten = 0; + const counter = new Transform({ + transform(chunk, _encoding, cb) { + bytesWritten += chunk.length; + cb(null, chunk); + }, }); - writeFileSync(outputPath, buffer); + await pipeline(readable, counter, writable); return { bytesWritten }; } @@ -275,6 +297,19 @@ export default class ContentAddressableStore { return await service.restore(options); } + /** + * Restores a file from its manifest as an async iterable of Buffer chunks. + * @param {Object} options + * @param {import('./src/domain/value-objects/Manifest.js').default} options.manifest - The file manifest. + * @param {Buffer} [options.encryptionKey] - 32-byte key, required if manifest is encrypted. + * @param {string} [options.passphrase] - Passphrase for KDF-based decryption. + * @returns {AsyncIterable} + */ + async *restoreStream(options) { + const service = await this.#getService(); + yield* service.restoreStream(options); + } + /** * Creates a Git tree object from a manifest. * @param {Object} options diff --git a/jsr.json b/jsr.json index 9ea68f27..b6a6b323 100644 --- a/jsr.json +++ b/jsr.json @@ -1,6 +1,6 @@ { "name": "@git-stunts/git-cas", - "version": "3.1.0", + "version": "4.0.0", "exports": { ".": "./index.js", "./service": "./src/domain/services/CasService.js", diff --git a/package.json b/package.json index 14323376..6466daf6 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@git-stunts/git-cas", - "version": "3.1.0", + "version": "4.0.0", "description": "Content-addressed storage backed by Git's object database, with optional encryption and pluggable codecs", "type": "module", "main": "index.js", diff --git a/src/domain/services/CasService.d.ts b/src/domain/services/CasService.d.ts index a98fc2cb..7694e0c7 100644 --- a/src/domain/services/CasService.d.ts +++ b/src/domain/services/CasService.d.ts @@ -3,7 +3,6 @@ * Domain service for Content Addressable Storage operations. */ -import { EventEmitter } from "node:events"; import Manifest from "../value-objects/Manifest.js"; import type { EncryptionMeta, CompressionMeta, KdfParams } from "../value-objects/Manifest.js"; @@ -40,13 +39,22 @@ export interface GitPersistencePort { ): Promise>; } +/** Port interface for observability (metrics, logging, tracing). */ +export interface ObservabilityPort { + metric(channel: string, data: Record): void; + log(level: "debug" | "info" | "warn" | "error", msg: string, meta?: Record): void; + span(name: string): { end(meta?: Record): void }; +} + /** Constructor options for {@link CasService}. */ export interface CasServiceOptions { persistence: GitPersistencePort; codec: CodecPort; crypto: CryptoPort; + observability: ObservabilityPort; chunkSize?: number; merkleThreshold?: number; + concurrency?: number; } /** Options for key derivation. */ @@ -74,12 +82,14 @@ export interface DeriveKeyResult { * Provides chunking, encryption, and integrity verification for storing * arbitrary data in Git's object database. */ -export default class CasService extends EventEmitter { +export default class CasService { readonly persistence: GitPersistencePort; readonly codec: CodecPort; readonly crypto: CryptoPort; + readonly observability: ObservabilityPort; readonly chunkSize: number; readonly merkleThreshold: number; + readonly concurrency: number; constructor(options: CasServiceOptions); @@ -112,6 +122,12 @@ export default class CasService extends EventEmitter { passphrase?: string; }): Promise<{ buffer: Buffer; bytesWritten: number }>; + restoreStream(options: { + manifest: Manifest; + encryptionKey?: Buffer; + passphrase?: string; + }): AsyncIterable; + readManifest(options: { treeOid: string }): Promise; deleteAsset(options: { diff --git a/src/domain/services/CasService.js b/src/domain/services/CasService.js index 3e5e0703..b20e81cd 100644 --- a/src/domain/services/CasService.js +++ b/src/domain/services/CasService.js @@ -3,12 +3,12 @@ * @fileoverview Domain service for Content Addressable Storage operations. * @module */ -import { EventEmitter } from 'node:events'; import { gunzip, createGzip } from 'node:zlib'; import { Readable } from 'node:stream'; import { promisify } from 'node:util'; import Manifest from '../value-objects/Manifest.js'; import CasError from '../errors/CasError.js'; +import Semaphore from './Semaphore.js'; const gunzipAsync = promisify(gunzip); @@ -16,39 +16,36 @@ const gunzipAsync = promisify(gunzip); * Domain service for Content Addressable Storage operations. * * Provides chunking, encryption, and integrity verification for storing - * arbitrary data in Git's object database. Extends {@link EventEmitter} to - * emit progress events during store/restore operations. - * - * @fires CasService#chunk:stored - * @fires CasService#chunk:restored - * @fires CasService#file:stored - * @fires CasService#file:restored - * @fires CasService#integrity:pass - * @fires CasService#integrity:fail - * @fires CasService#error + * arbitrary data in Git's object database. */ -export default class CasService extends EventEmitter { +export default class CasService { /** * @param {Object} options * @param {import('../../ports/GitPersistencePort.js').default} options.persistence * @param {import('../../ports/CodecPort.js').default} options.codec * @param {import('../../ports/CryptoPort.js').default} options.crypto + * @param {import('../../ports/ObservabilityPort.js').default} options.observability * @param {number} [options.chunkSize=262144] - 256 KiB * @param {number} [options.merkleThreshold=1000] - Chunk count threshold for Merkle manifests. + * @param {number} [options.concurrency=1] - Maximum parallel chunk I/O operations. */ - constructor({ persistence, codec, crypto, chunkSize = 256 * 1024, merkleThreshold = 1000 }) { - super(); + constructor({ persistence, codec, crypto, observability, chunkSize = 256 * 1024, merkleThreshold = 1000, concurrency = 1 }) { if (chunkSize < 1024) { throw new Error('Chunk size must be at least 1024 bytes'); } this.persistence = persistence; this.codec = codec; this.crypto = crypto; + this.observability = observability; this.chunkSize = chunkSize; if (!Number.isInteger(merkleThreshold) || merkleThreshold < 1) { throw new Error('Merkle threshold must be a positive integer'); } this.merkleThreshold = merkleThreshold; + if (!Number.isInteger(concurrency) || concurrency < 1) { + throw new Error('Concurrency must be a positive integer'); + } + this.concurrency = concurrency; } /** @@ -62,18 +59,17 @@ export default class CasService extends EventEmitter { } /** - * Stores a single buffer chunk in Git and appends its metadata to the manifest. + * Stores a single buffer chunk in Git, returning its metadata. * @private * @param {Buffer} buf - The chunk data to store. - * @param {Object} manifestData - Mutable manifest accumulator. + * @param {number} index - Chunk index. + * @returns {Promise<{ index: number, size: number, digest: string, blob: string }>} */ - async _storeChunk(buf, manifestData) { + async _storeChunk(buf, index) { const digest = await this._sha256(buf); const blob = await this.persistence.writeBlob(buf); - const entry = { index: manifestData.chunks.length, size: buf.length, digest, blob }; - manifestData.chunks.push(entry); - manifestData.size += buf.length; - this.emit('chunk:stored', { index: entry.index, size: entry.size, digest, blob }); + this.observability.metric('chunk', { action: 'stored', index, size: buf.length, digest, blob }); + return { index, size: buf.length, digest, blob }; } /** @@ -85,12 +81,26 @@ export default class CasService extends EventEmitter { */ async _chunkAndStore(source, manifestData) { let buffer = Buffer.alloc(0); + const sem = new Semaphore(this.concurrency); + const pending = []; + let nextIndex = 0; + + const launchWrite = (buf, idx) => { + const p = sem.acquire().then(async () => { + try { + return await this._storeChunk(buf, idx); + } finally { + sem.release(); + } + }); + pending.push(p); + }; try { for await (const chunk of source) { buffer = Buffer.concat([buffer, chunk]); while (buffer.length >= this.chunkSize) { - await this._storeChunk(buffer.slice(0, this.chunkSize), manifestData); + launchWrite(buffer.slice(0, this.chunkSize), nextIndex++); buffer = buffer.slice(this.chunkSize); } } @@ -99,16 +109,21 @@ export default class CasService extends EventEmitter { const casErr = new CasError( `Stream error during store: ${err.message}`, 'STREAM_ERROR', - { chunksWritten: manifestData.chunks.length, originalError: err }, + { chunksWritten: nextIndex, originalError: err }, ); - if (this.listenerCount('error') > 0) { - this.emit('error', { code: casErr.code, message: casErr.message }); - } + this.observability.metric('error', { code: casErr.code, message: casErr.message }); throw casErr; } if (buffer.length > 0) { - await this._storeChunk(buffer, manifestData); + launchWrite(buffer, nextIndex++); + } + + const results = await Promise.all(pending); + results.sort((a, b) => a.index - b.index); + for (const entry of results) { + manifestData.chunks.push(entry); + manifestData.size += entry.size; } } @@ -262,8 +277,8 @@ export default class CasService extends EventEmitter { } const manifest = new Manifest(manifestData); - this.emit('file:stored', { - slug, size: manifest.size, chunkCount: manifest.chunks.length, encrypted: !!encryptionKey, + this.observability.metric('file', { + action: 'stored', slug, size: manifest.size, chunkCount: manifest.chunks.length, encrypted: !!encryptionKey, }); return manifest; } @@ -366,13 +381,11 @@ export default class CasService extends EventEmitter { 'INTEGRITY_ERROR', { chunkIndex: chunk.index, expected: chunk.digest, actual: digest }, ); - if (this.listenerCount('error') > 0) { - this.emit('error', { code: err.code, message: err.message }); - } + this.observability.metric('error', { code: err.code, message: err.message }); throw err; } buffers.push(blob); - this.emit('chunk:restored', { index: chunk.index, size: blob.length, digest: chunk.digest }); + this.observability.metric('chunk', { action: 'restored', index: chunk.index, size: blob.length, digest: chunk.digest }); } return buffers; } @@ -436,12 +449,48 @@ export default class CasService extends EventEmitter { * @throws {CasError} INTEGRITY_ERROR if chunk verification or decryption fails. */ async restore({ manifest, encryptionKey, passphrase }) { + const chunks = []; + for await (const chunk of this.restoreStream({ manifest, encryptionKey, passphrase })) { + chunks.push(chunk); + } + const buffer = Buffer.concat(chunks); + return { buffer, bytesWritten: buffer.length }; + } + + /** + * Restores a file from its manifest as an async iterable of Buffer chunks. + * + * For unencrypted, uncompressed files this is true per-chunk streaming + * with O(chunkSize) memory. For encrypted or compressed files, all chunks + * are buffered internally for decryption/decompression, then yielded. + * + * @param {Object} options + * @param {import('../value-objects/Manifest.js').default} options.manifest - The file manifest. + * @param {Buffer} [options.encryptionKey] - 32-byte key, required if manifest is encrypted. + * @param {string} [options.passphrase] - Passphrase for KDF-based decryption. + * @yields {Buffer} + * @throws {CasError} MISSING_KEY if manifest is encrypted but no key is provided. + * @throws {CasError} INTEGRITY_ERROR if chunk verification or decryption fails. + */ + async *restoreStream({ manifest, encryptionKey, passphrase }) { const key = await this._resolveEncryptionKey(manifest, encryptionKey, passphrase); if (manifest.chunks.length === 0) { - return { buffer: Buffer.alloc(0), bytesWritten: 0 }; + return; } + if (manifest.encryption?.encrypted || manifest.compression) { + yield* this._restoreBuffered(manifest, key); + } else { + yield* this._restoreStreaming(manifest); + } + } + + /** + * Buffered restore path for encrypted/compressed manifests. + * @private + */ + async *_restoreBuffered(manifest, key) { let buffer = Buffer.concat(await this._readAndVerifyChunks(manifest.chunks)); if (manifest.encryption?.encrypted) { @@ -449,18 +498,74 @@ export default class CasService extends EventEmitter { } if (manifest.compression) { - try { - buffer = await gunzipAsync(buffer); - } catch (err) { - if (err instanceof CasError) { throw err; } - throw new CasError(`Decompression failed: ${err.message}`, 'INTEGRITY_ERROR', { originalError: err }); + buffer = await this._decompress(buffer); + } + + this.observability.metric('file', { + action: 'restored', slug: manifest.slug, size: buffer.length, chunkCount: manifest.chunks.length, + }); + + for (let offset = 0; offset < buffer.length; offset += this.chunkSize) { + yield buffer.subarray(offset, offset + this.chunkSize); + } + } + + /** + * Per-chunk streaming restore with read-ahead. + * @private + */ + async *_restoreStreaming(manifest) { + const chunks = manifest.chunks; + const N = this.concurrency; + let totalSize = 0; + + const readAndVerify = async (chunk) => { + const blob = await this.persistence.readBlob(chunk.blob); + const digest = await this._sha256(blob); + if (digest !== chunk.digest) { + const err = new CasError( + `Chunk ${chunk.index} integrity check failed`, + 'INTEGRITY_ERROR', + { chunkIndex: chunk.index, expected: chunk.digest, actual: digest }, + ); + this.observability.metric('error', { code: err.code, message: err.message }); + throw err; + } + return blob; + }; + + const ahead = []; + for (let i = 0; i < Math.min(N, chunks.length); i++) { + ahead.push(readAndVerify(chunks[i])); + } + + for (let i = 0; i < chunks.length; i++) { + const blob = await ahead[i % N]; + this.observability.metric('chunk', { action: 'restored', index: chunks[i].index, size: blob.length, digest: chunks[i].digest }); + totalSize += blob.length; + const nextIdx = i + N; + if (nextIdx < chunks.length) { + ahead[i % N] = readAndVerify(chunks[nextIdx]); } + yield blob; } - this.emit('file:restored', { - slug: manifest.slug, size: buffer.length, chunkCount: manifest.chunks.length, + this.observability.metric('file', { + action: 'restored', slug: manifest.slug, size: totalSize, chunkCount: chunks.length, }); - return { buffer, bytesWritten: buffer.length }; + } + + /** + * Decompresses a gzip buffer. + * @private + */ + async _decompress(buffer) { + try { + return await gunzipAsync(buffer); + } catch (err) { + if (err instanceof CasError) { throw err; } + throw new CasError(`Decompression failed: ${err.message}`, 'INTEGRITY_ERROR', { originalError: err }); + } } /** @@ -619,13 +724,13 @@ export default class CasService extends EventEmitter { const blob = await this.persistence.readBlob(chunk.blob); const digest = await this._sha256(blob); if (digest !== chunk.digest) { - this.emit('integrity:fail', { - slug: manifest.slug, chunkIndex: chunk.index, expected: chunk.digest, actual: digest, + this.observability.metric('integrity', { + action: 'fail', slug: manifest.slug, chunkIndex: chunk.index, expected: chunk.digest, actual: digest, }); return false; } } - this.emit('integrity:pass', { slug: manifest.slug }); + this.observability.metric('integrity', { action: 'pass', slug: manifest.slug }); return true; } } diff --git a/src/domain/services/Semaphore.js b/src/domain/services/Semaphore.js new file mode 100644 index 00000000..274c50a1 --- /dev/null +++ b/src/domain/services/Semaphore.js @@ -0,0 +1,44 @@ +/** + * Counting semaphore for limiting concurrency. + */ +export default class Semaphore { + #max; + #active = 0; + #queue = []; + + /** + * @param {number} concurrency - Maximum concurrent permits. + */ + constructor(concurrency) { + if (!Number.isInteger(concurrency) || concurrency < 1) { + throw new Error('Semaphore concurrency must be a positive integer'); + } + this.#max = concurrency; + } + + /** + * Acquire a permit, waiting if necessary. + * @returns {Promise} + */ + acquire() { + if (this.#active < this.#max) { + this.#active++; + return Promise.resolve(); + } + return new Promise((resolve) => { + this.#queue.push(resolve); + }); + } + + /** + * Release a permit. + */ + release() { + if (this.#queue.length > 0) { + const next = this.#queue.shift(); + next(); + } else { + this.#active--; + } + } +} diff --git a/src/infrastructure/adapters/EventEmitterObserver.js b/src/infrastructure/adapters/EventEmitterObserver.js new file mode 100644 index 00000000..82467a67 --- /dev/null +++ b/src/infrastructure/adapters/EventEmitterObserver.js @@ -0,0 +1,70 @@ +/** + * Observability adapter that bridges metrics to an EventEmitter. + * + * Maps `metric('chunk', { action: 'stored', ... })` → `emit('chunk:stored', ...)`. + * Exposes `.on()`, `.removeListener()`, `.listenerCount()` for backward compatibility + * with code that previously subscribed directly to CasService events. + */ +import { EventEmitter } from 'node:events'; + +export default class EventEmitterObserver { + #emitter = new EventEmitter(); + + /** + * Route a metric to the underlying EventEmitter. + * + * Error metrics are only emitted when listeners are attached (matching + * the previous CasService behavior that guarded `this.emit('error', ...)`). + * + * @param {string} channel + * @param {Object} data - Must include `action` to form the event name. + */ + metric(channel, data) { + const eventName = `${channel}:${data.action}`; + if (channel === 'error') { + if (this.#emitter.listenerCount('error') > 0) { + this.#emitter.emit('error', data); + } + return; + } + const payload = Object.fromEntries(Object.entries(data).filter(([k]) => k !== 'action')); + this.#emitter.emit(eventName, payload); + } + + log(_level, _msg, _meta) {} + + span(_name) { + return { end() {} }; + } + + /** + * Subscribe to an event. + * @param {string} event + * @param {Function} listener + * @returns {this} + */ + on(event, listener) { + this.#emitter.on(event, listener); + return this; + } + + /** + * Remove a listener. + * @param {string} event + * @param {Function} listener + * @returns {this} + */ + removeListener(event, listener) { + this.#emitter.removeListener(event, listener); + return this; + } + + /** + * Return the number of listeners for an event. + * @param {string} event + * @returns {number} + */ + listenerCount(event) { + return this.#emitter.listenerCount(event); + } +} diff --git a/src/infrastructure/adapters/SilentObserver.js b/src/infrastructure/adapters/SilentObserver.js new file mode 100644 index 00000000..e05e4158 --- /dev/null +++ b/src/infrastructure/adapters/SilentObserver.js @@ -0,0 +1,11 @@ +/** + * No-op observability adapter. All methods are empty. + * Used as the default when no observability is configured. + */ +export default class SilentObserver { + metric(_channel, _data) {} + log(_level, _msg, _meta) {} + span(_name) { + return { end() {} }; + } +} diff --git a/src/infrastructure/adapters/StatsCollector.js b/src/infrastructure/adapters/StatsCollector.js new file mode 100644 index 00000000..6d87f9c9 --- /dev/null +++ b/src/infrastructure/adapters/StatsCollector.js @@ -0,0 +1,44 @@ +/** + * Observability adapter that accumulates metrics for later retrieval. + */ +export default class StatsCollector { + #chunksProcessed = 0; + #bytesTotal = 0; + #errors = 0; + #startTime = null; + + metric(channel, data) { + if (!this.#startTime) { + this.#startTime = Date.now(); + } + if (channel === 'chunk') { + this.#chunksProcessed++; + this.#bytesTotal += data.size || 0; + } + if (channel === 'error') { + this.#errors++; + } + } + + log(_level, _msg, _meta) {} + + span(_name) { + return { end() {} }; + } + + /** + * Returns accumulated statistics. + * @returns {{ chunksProcessed: number, bytesTotal: number, elapsed: number, throughput: number, errors: number }} + */ + summary() { + const elapsed = this.#startTime ? (Date.now() - this.#startTime) / 1000 : 0; + const throughput = elapsed > 0 ? this.#bytesTotal / elapsed : 0; + return { + chunksProcessed: this.#chunksProcessed, + bytesTotal: this.#bytesTotal, + elapsed, + throughput, + errors: this.#errors, + }; + } +} diff --git a/src/ports/ObservabilityPort.js b/src/ports/ObservabilityPort.js new file mode 100644 index 00000000..8236d1fe --- /dev/null +++ b/src/ports/ObservabilityPort.js @@ -0,0 +1,33 @@ +/** + * Abstract port for observability (metrics, logging, tracing). + * @abstract + */ +export default class ObservabilityPort { + /** + * Emit a metric on the given channel. + * @param {string} _channel - Metric channel (e.g. 'chunk', 'file', 'integrity', 'error'). + * @param {Object} _data - Metric payload. + */ + metric(_channel, _data) { + throw new Error('Not implemented'); + } + + /** + * Log a message at the given level. + * @param {'debug'|'info'|'warn'|'error'} _level + * @param {string} _msg + * @param {Object} [_meta] + */ + log(_level, _msg, _meta) { + throw new Error('Not implemented'); + } + + /** + * Start a named span for tracing. + * @param {string} _name + * @returns {{ end(meta?: Object): void }} + */ + span(_name) { + throw new Error('Not implemented'); + } +} diff --git a/test/unit/cli/progress.test.js b/test/unit/cli/progress.test.js index 1e9e8b43..14553706 100644 --- a/test/unit/cli/progress.test.js +++ b/test/unit/cli/progress.test.js @@ -1,5 +1,5 @@ import { describe, it, expect, vi } from 'vitest'; -import EventEmitter from 'node:events'; +import EventEmitterObserver from '../../../src/infrastructure/adapters/EventEmitterObserver.js'; import { makeCtx } from './_testContext.js'; vi.mock('../../../bin/ui/context.js', () => ({ @@ -13,16 +13,16 @@ const FILE_SIZE = 5 * 256 * 1024; describe('createStoreProgress', () => { it('returns no-op when quiet is true', () => { const p = createStoreProgress({ filePath: 'test.bin', chunkSize: 256 * 1024, quiet: true }); - const emitter = new EventEmitter(); + const emitter = new EventEmitterObserver(); p.attach(emitter); - emitter.emit('chunk:stored', { index: 0, size: 256 * 1024 }); + emitter.metric('chunk', { action: 'stored', index: 0, size: 256 * 1024 }); p.detach(); expect(emitter.listenerCount('chunk:stored')).toBe(0); }); it('attaches and detaches from EventEmitter', () => { const p = createStoreProgress({ filePath: 'test.bin', chunkSize: 256 * 1024, quiet: false, fileSize: FILE_SIZE }); - const emitter = new EventEmitter(); + const emitter = new EventEmitterObserver(); p.attach(emitter); expect(emitter.listenerCount('chunk:stored')).toBe(1); p.detach(); @@ -31,10 +31,10 @@ describe('createStoreProgress', () => { it('tracks chunk events without throwing', () => { const p = createStoreProgress({ filePath: 'test.bin', chunkSize: 256 * 1024, quiet: false, fileSize: FILE_SIZE }); - const emitter = new EventEmitter(); + const emitter = new EventEmitterObserver(); p.attach(emitter); for (let i = 0; i < 5; i++) { - emitter.emit('chunk:stored', { index: i, size: 256 * 1024 }); + emitter.metric('chunk', { action: 'stored', index: i, size: 256 * 1024 }); } p.detach(); expect(emitter.listenerCount('chunk:stored')).toBe(0); @@ -44,7 +44,7 @@ describe('createStoreProgress', () => { describe('createRestoreProgress', () => { it('returns no-op when quiet is true', () => { const p = createRestoreProgress({ totalChunks: 5, quiet: true }); - const emitter = new EventEmitter(); + const emitter = new EventEmitterObserver(); p.attach(emitter); p.detach(); expect(emitter.listenerCount('chunk:restored')).toBe(0); @@ -52,7 +52,7 @@ describe('createRestoreProgress', () => { it('returns no-op for 0-chunk manifests', () => { const p = createRestoreProgress({ totalChunks: 0, quiet: false }); - const emitter = new EventEmitter(); + const emitter = new EventEmitterObserver(); p.attach(emitter); p.detach(); expect(emitter.listenerCount('chunk:restored')).toBe(0); @@ -60,10 +60,10 @@ describe('createRestoreProgress', () => { it('attaches and detaches from EventEmitter', () => { const p = createRestoreProgress({ totalChunks: 3, quiet: false }); - const emitter = new EventEmitter(); + const emitter = new EventEmitterObserver(); p.attach(emitter); expect(emitter.listenerCount('chunk:restored')).toBe(1); - emitter.emit('chunk:restored', { index: 0, size: 256 * 1024 }); + emitter.metric('chunk', { action: 'restored', index: 0, size: 256 * 1024 }); p.detach(); expect(emitter.listenerCount('chunk:restored')).toBe(0); }); diff --git a/test/unit/domain/services/CasService.codec.test.js b/test/unit/domain/services/CasService.codec.test.js index b0fe6d80..61321c38 100644 --- a/test/unit/domain/services/CasService.codec.test.js +++ b/test/unit/domain/services/CasService.codec.test.js @@ -4,6 +4,7 @@ import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCrypt import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; import CborCodec from '../../../../src/infrastructure/codecs/CborCodec.js'; import Manifest from '../../../../src/domain/value-objects/Manifest.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; describe('CasService with Codecs', () => { let mockPersistence; @@ -23,7 +24,7 @@ describe('CasService with Codecs', () => { }); it('uses JsonCodec when injected', async () => { - const service = new CasService({ persistence: mockPersistence, crypto: new NodeCryptoAdapter(), codec: new JsonCodec() }); + const service = new CasService({ persistence: mockPersistence, crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), observability: new SilentObserver() }); await service.createTree({ manifest: dummyManifest }); expect(mockPersistence.writeBlob).toHaveBeenCalledWith(expect.stringContaining('{')); @@ -33,7 +34,7 @@ describe('CasService with Codecs', () => { }); it('uses CborCodec when injected', async () => { - const service = new CasService({ persistence: mockPersistence, crypto: new NodeCryptoAdapter(), codec: new CborCodec() }); + const service = new CasService({ persistence: mockPersistence, crypto: new NodeCryptoAdapter(), codec: new CborCodec(), observability: new SilentObserver() }); await service.createTree({ manifest: dummyManifest }); // CBOR output is binary (Buffer), so we check for Buffer diff --git a/test/unit/domain/services/CasService.compression.test.js b/test/unit/domain/services/CasService.compression.test.js index b860f4fb..f2c544a7 100644 --- a/test/unit/domain/services/CasService.compression.test.js +++ b/test/unit/domain/services/CasService.compression.test.js @@ -3,6 +3,7 @@ import { randomBytes } from 'node:crypto'; import CasService from '../../../../src/domain/services/CasService.js'; import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; // --------------------------------------------------------------------------- // Helpers @@ -50,6 +51,7 @@ function setup() { crypto, codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); return { crypto, blobStore, mockPersistence, service }; diff --git a/test/unit/domain/services/CasService.crypto.test.js b/test/unit/domain/services/CasService.crypto.test.js index e4ca2df1..2930f1f1 100644 --- a/test/unit/domain/services/CasService.crypto.test.js +++ b/test/unit/domain/services/CasService.crypto.test.js @@ -3,6 +3,7 @@ import { randomBytes } from 'node:crypto'; import CasService from '../../../../src/domain/services/CasService.js'; import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; // --------------------------------------------------------------------------- // 1. Round-trip golden path @@ -22,6 +23,7 @@ describe('CasService encryption – round-trip golden path', () => { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); }); @@ -74,6 +76,7 @@ describe('CasService encryption – wrong key and tampered ciphertext', () => { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); }); @@ -122,6 +125,7 @@ describe('CasService encryption – tampered auth tag', () => { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); }); @@ -159,6 +163,7 @@ describe('CasService encryption – tampered nonce', () => { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); }); @@ -196,6 +201,7 @@ describe('CasService encryption – passthrough', () => { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); }); @@ -230,6 +236,7 @@ describe('CasService encryption – fuzz round-trip', () => { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); }); @@ -269,6 +276,7 @@ describe('CasService encryption – fuzz tamper', () => { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); }); diff --git a/test/unit/domain/services/CasService.deleteAsset.test.js b/test/unit/domain/services/CasService.deleteAsset.test.js index 4fb035c0..d01120b1 100644 --- a/test/unit/domain/services/CasService.deleteAsset.test.js +++ b/test/unit/domain/services/CasService.deleteAsset.test.js @@ -4,6 +4,7 @@ import CasService from '../../../../src/domain/services/CasService.js'; import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; import CasError from '../../../../src/domain/errors/CasError.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; /** * Helper to create deterministic 64-char SHA-256 digests for test data. @@ -28,6 +29,7 @@ function setup() { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); return { mockPersistence, service }; diff --git a/test/unit/domain/services/CasService.empty-file.test.js b/test/unit/domain/services/CasService.empty-file.test.js index a52c32bb..3e778504 100644 --- a/test/unit/domain/services/CasService.empty-file.test.js +++ b/test/unit/domain/services/CasService.empty-file.test.js @@ -6,6 +6,7 @@ import os from 'node:os'; import CasService from '../../../../src/domain/services/CasService.js'; import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; /** * Helper: writes a 0-byte file and returns its path. @@ -30,6 +31,7 @@ function setup() { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); const tempDir = mkdtempSync(path.join(os.tmpdir(), 'cas-empty-')); return { mockPersistence, service, tempDir }; diff --git a/test/unit/domain/services/CasService.errors.test.js b/test/unit/domain/services/CasService.errors.test.js index 19affaef..2a284a45 100644 --- a/test/unit/domain/services/CasService.errors.test.js +++ b/test/unit/domain/services/CasService.errors.test.js @@ -5,6 +5,7 @@ import CasService from '../../../../src/domain/services/CasService.js'; import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; import Manifest from '../../../../src/domain/value-objects/Manifest.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; /** Deterministic SHA-256 hex digest for a given string. */ const sha256 = (str) => createHash('sha256').update(str).digest('hex'); @@ -22,13 +23,13 @@ describe('CasService – constructor – chunkSize validation', () => { it('throws when chunkSize is 0', () => { expect( - () => new CasService({ persistence: mockPersistence, crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 0 }), + () => new CasService({ persistence: mockPersistence, crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 0, observability: new SilentObserver() }), ).toThrow('Chunk size must be at least 1024 bytes'); }); it('throws when chunkSize is 512', () => { expect( - () => new CasService({ persistence: mockPersistence, crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 512 }), + () => new CasService({ persistence: mockPersistence, crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 512, observability: new SilentObserver() }), ).toThrow('Chunk size must be at least 1024 bytes'); }); @@ -38,6 +39,7 @@ describe('CasService – constructor – chunkSize validation', () => { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); expect(service.chunkSize).toBe(1024); }); @@ -56,6 +58,7 @@ describe('CasService – store – mutual exclusion and validation', () => { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); }); @@ -96,6 +99,7 @@ describe('CasService – restore – mutual exclusion', () => { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); }); @@ -140,6 +144,7 @@ describe('CasService – store', () => { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); await expect( @@ -177,6 +182,7 @@ describe('CasService – verifyIntegrity', () => { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); const manifest = new Manifest({ @@ -215,6 +221,7 @@ describe('CasService – createTree', () => { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); // A plain object that lacks .toJSON() and .chunks @@ -229,6 +236,7 @@ describe('CasService – createTree', () => { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); const badManifest = { toJSON: 'not-a-function', chunks: [] }; diff --git a/test/unit/domain/services/CasService.events.test.js b/test/unit/domain/services/CasService.events.test.js index 021845ac..7de6e54e 100644 --- a/test/unit/domain/services/CasService.events.test.js +++ b/test/unit/domain/services/CasService.events.test.js @@ -3,11 +3,14 @@ import { randomBytes } from 'node:crypto'; import CasService from '../../../../src/domain/services/CasService.js'; import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; +import EventEmitterObserver from '../../../../src/infrastructure/adapters/EventEmitterObserver.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; import CasError from '../../../../src/domain/errors/CasError.js'; function setup() { const crypto = new NodeCryptoAdapter(); const blobStore = new Map(); + const observer = new EventEmitterObserver(); const mockPersistence = { writeBlob: vi.fn().mockImplementation(async (content) => { @@ -28,10 +31,11 @@ function setup() { persistence: mockPersistence, crypto, codec: new JsonCodec(), + observability: observer, chunkSize: 1024, }); - return { crypto, blobStore, mockPersistence, service }; + return { crypto, blobStore, mockPersistence, service, observer }; } async function storeBuffer(svc, buf, opts = {}) { @@ -46,9 +50,9 @@ async function storeBuffer(svc, buf, opts = {}) { describe('CasService events – chunk:stored', () => { it('emits chunk:stored per chunk with correct payload', async () => { - const { service } = setup(); + const { service, observer } = setup(); const onChunkStored = vi.fn(); - service.on('chunk:stored', onChunkStored); + observer.on('chunk:stored', onChunkStored); await storeBuffer(service, randomBytes(2048)); @@ -64,9 +68,9 @@ describe('CasService events – chunk:stored', () => { describe('CasService events – file:stored', () => { it('emits file:stored once with correct payload', async () => { - const { service } = setup(); + const { service, observer } = setup(); const onFileStored = vi.fn(); - service.on('file:stored', onFileStored); + observer.on('file:stored', onFileStored); await storeBuffer(service, randomBytes(2048)); @@ -77,9 +81,9 @@ describe('CasService events – file:stored', () => { }); it('emits encrypted=true when encryption used', async () => { - const { service } = setup(); + const { service, observer } = setup(); const onFileStored = vi.fn(); - service.on('file:stored', onFileStored); + observer.on('file:stored', onFileStored); await storeBuffer(service, randomBytes(1024), { encryptionKey: randomBytes(32) }); @@ -89,11 +93,11 @@ describe('CasService events – file:stored', () => { describe('CasService events – chunk:restored', () => { it('emits chunk:restored per chunk with correct payload', async () => { - const { service } = setup(); + const { service, observer } = setup(); const manifest = await storeBuffer(service, randomBytes(2048)); const onChunkRestored = vi.fn(); - service.on('chunk:restored', onChunkRestored); + observer.on('chunk:restored', onChunkRestored); await service.restore({ manifest }); expect(onChunkRestored).toHaveBeenCalledTimes(2); @@ -108,11 +112,11 @@ describe('CasService events – chunk:restored', () => { describe('CasService events – file:restored', () => { it('emits file:restored once with correct payload', async () => { - const { service } = setup(); + const { service, observer } = setup(); const manifest = await storeBuffer(service, randomBytes(2048)); const onFileRestored = vi.fn(); - service.on('file:restored', onFileRestored); + observer.on('file:restored', onFileRestored); await service.restore({ manifest }); expect(onFileRestored).toHaveBeenCalledTimes(1); @@ -124,11 +128,11 @@ describe('CasService events – file:restored', () => { describe('CasService events – integrity:pass', () => { it('emits integrity:pass on successful verification', async () => { - const { service } = setup(); + const { service, observer } = setup(); const manifest = await storeBuffer(service, randomBytes(2048)); const onPass = vi.fn(); - service.on('integrity:pass', onPass); + observer.on('integrity:pass', onPass); await service.verifyIntegrity(manifest); expect(onPass).toHaveBeenCalledTimes(1); @@ -138,13 +142,13 @@ describe('CasService events – integrity:pass', () => { describe('CasService events – integrity:fail', () => { it('emits integrity:fail on chunk mismatch', async () => { - const { service, blobStore } = setup(); + const { service, observer, blobStore } = setup(); const manifest = await storeBuffer(service, randomBytes(2048)); blobStore.set(manifest.chunks[0].blob, Buffer.from('corrupted')); const onFail = vi.fn(); - service.on('integrity:fail', onFail); + observer.on('integrity:fail', onFail); await service.verifyIntegrity(manifest); expect(onFail).toHaveBeenCalledTimes(1); @@ -156,13 +160,13 @@ describe('CasService events – integrity:fail', () => { describe('CasService events – error on restore integrity failure', () => { it('emits error event on integrity failure during restore', async () => { - const { service, blobStore } = setup(); + const { service, observer, blobStore } = setup(); const manifest = await storeBuffer(service, randomBytes(1024)); blobStore.set(manifest.chunks[0].blob, Buffer.from('corrupted')); const onError = vi.fn(); - service.on('error', onError); + observer.on('error', onError); await expect(service.restore({ manifest })).rejects.toThrow(CasError); @@ -173,20 +177,43 @@ describe('CasService events – error on restore integrity failure', () => { }); }); -describe('CasService events – no listeners attached', () => { - it('store succeeds without listeners', async () => { - const { service } = setup(); +function setupSilent() { + const crypto = new NodeCryptoAdapter(); + const blobStore = new Map(); + const mockPersistence = { + writeBlob: vi.fn().mockImplementation(async (content) => { + const buf = Buffer.isBuffer(content) ? content : Buffer.from(content); + const oid = await crypto.sha256(buf); + blobStore.set(oid, buf); + return oid; + }), + writeTree: vi.fn().mockResolvedValue('mock-tree-oid'), + readBlob: vi.fn().mockImplementation(async (oid) => { + const buf = blobStore.get(oid); + if (!buf) { throw new Error(`Blob not found: ${oid}`); } + return buf; + }), + }; + return new CasService({ + persistence: mockPersistence, crypto, codec: new JsonCodec(), + observability: new SilentObserver(), chunkSize: 1024, + }); +} + +describe('CasService events – no listeners attached (SilentObserver)', () => { + it('store succeeds with SilentObserver', async () => { + const service = setupSilent(); await expect(storeBuffer(service, randomBytes(2048))).resolves.toBeDefined(); }); - it('restore succeeds without listeners', async () => { - const { service } = setup(); + it('restore succeeds with SilentObserver', async () => { + const service = setupSilent(); const manifest = await storeBuffer(service, randomBytes(1024)); await expect(service.restore({ manifest })).resolves.toBeDefined(); }); - it('verifyIntegrity succeeds without listeners', async () => { - const { service } = setup(); + it('verifyIntegrity succeeds with SilentObserver', async () => { + const service = setupSilent(); const manifest = await storeBuffer(service, randomBytes(1024)); await expect(service.verifyIntegrity(manifest)).resolves.toBe(true); }); @@ -194,26 +221,26 @@ describe('CasService events – no listeners attached', () => { describe('CasService events – event count verification', () => { it('emits 3 chunk:stored for 3-chunk file', async () => { - const { service } = setup(); + const { service, observer } = setup(); const listener = vi.fn(); - service.on('chunk:stored', listener); + observer.on('chunk:stored', listener); await storeBuffer(service, randomBytes(3072)); expect(listener).toHaveBeenCalledTimes(3); }); it('emits 3 chunk:restored for 3-chunk file', async () => { - const { service } = setup(); + const { service, observer } = setup(); const manifest = await storeBuffer(service, randomBytes(3072)); const listener = vi.fn(); - service.on('chunk:restored', listener); + observer.on('chunk:restored', listener); await service.restore({ manifest }); expect(listener).toHaveBeenCalledTimes(3); }); it('emits 1 chunk:stored for sub-chunk file', async () => { - const { service } = setup(); + const { service, observer } = setup(); const listener = vi.fn(); - service.on('chunk:stored', listener); + observer.on('chunk:stored', listener); await storeBuffer(service, randomBytes(512)); expect(listener).toHaveBeenCalledTimes(1); }); diff --git a/test/unit/domain/services/CasService.findOrphanedChunks.test.js b/test/unit/domain/services/CasService.findOrphanedChunks.test.js index 138c5339..761c7e3c 100644 --- a/test/unit/domain/services/CasService.findOrphanedChunks.test.js +++ b/test/unit/domain/services/CasService.findOrphanedChunks.test.js @@ -3,6 +3,7 @@ import CasService from '../../../../src/domain/services/CasService.js'; import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; import CasError from '../../../../src/domain/errors/CasError.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; import { digestOf } from '../../../helpers/crypto.js'; /** @@ -20,6 +21,7 @@ function setup() { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); return { mockPersistence, service }; } diff --git a/test/unit/domain/services/CasService.kdf.test.js b/test/unit/domain/services/CasService.kdf.test.js index 31d967dc..e5b2dc3c 100644 --- a/test/unit/domain/services/CasService.kdf.test.js +++ b/test/unit/domain/services/CasService.kdf.test.js @@ -4,6 +4,7 @@ import CasService from '../../../../src/domain/services/CasService.js'; import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; import CasError from '../../../../src/domain/errors/CasError.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; // --------------------------------------------------------------------------- // Helpers @@ -38,6 +39,7 @@ function setup() { crypto, codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); return { mockPersistence, service, blobs, crypto }; } diff --git a/test/unit/domain/services/CasService.key-validation.test.js b/test/unit/domain/services/CasService.key-validation.test.js index a87ba8fb..7f622abf 100644 --- a/test/unit/domain/services/CasService.key-validation.test.js +++ b/test/unit/domain/services/CasService.key-validation.test.js @@ -7,6 +7,7 @@ import CasService from '../../../../src/domain/services/CasService.js'; import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; import CasError from '../../../../src/domain/errors/CasError.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; function createService(mockPersistence) { return new CasService({ @@ -14,6 +15,7 @@ function createService(mockPersistence) { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); } diff --git a/test/unit/domain/services/CasService.merkle.test.js b/test/unit/domain/services/CasService.merkle.test.js index 22ef71a4..26d11884 100644 --- a/test/unit/domain/services/CasService.merkle.test.js +++ b/test/unit/domain/services/CasService.merkle.test.js @@ -4,6 +4,7 @@ import CasService from '../../../../src/domain/services/CasService.js'; import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; import Manifest from '../../../../src/domain/value-objects/Manifest.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; // --------------------------------------------------------------------------- // Helpers @@ -57,6 +58,7 @@ function setup(merkleThreshold = 5) { codec, chunkSize: 1024, merkleThreshold, + observability: new SilentObserver(), }); return { mockPersistence, service, blobs, trees, crypto, codec }; } diff --git a/test/unit/domain/services/CasService.parallel.test.js b/test/unit/domain/services/CasService.parallel.test.js new file mode 100644 index 00000000..57a24a2c --- /dev/null +++ b/test/unit/domain/services/CasService.parallel.test.js @@ -0,0 +1,116 @@ +import { describe, it, expect, vi } from 'vitest'; +import { randomBytes } from 'node:crypto'; +import CasService from '../../../../src/domain/services/CasService.js'; +import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; +import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; + +function setup(concurrency = 1) { + const crypto = new NodeCryptoAdapter(); + const blobStore = new Map(); + + const mockPersistence = { + writeBlob: vi.fn().mockImplementation(async (content) => { + const buf = Buffer.isBuffer(content) ? content : Buffer.from(content); + const oid = await crypto.sha256(buf); + blobStore.set(oid, buf); + return oid; + }), + writeTree: vi.fn().mockResolvedValue('mock-tree-oid'), + readBlob: vi.fn().mockImplementation(async (oid) => { + const buf = blobStore.get(oid); + if (!buf) { throw new Error(`Blob not found: ${oid}`); } + return buf; + }), + }; + + const service = new CasService({ + persistence: mockPersistence, + crypto, + codec: new JsonCodec(), + observability: new SilentObserver(), + chunkSize: 1024, + concurrency, + }); + + return { crypto, blobStore, mockPersistence, service }; +} + +async function storeBuffer(svc, buf, opts = {}) { + async function* source() { yield buf; } + return svc.store({ + source: source(), + slug: opts.slug || 'test', + filename: opts.filename || 'test.bin', + encryptionKey: opts.encryptionKey, + compression: opts.compression, + }); +} + +describe('Parallel I/O – sequential baseline', () => { + it('concurrency: 1 — round-trip', async () => { + const { service } = setup(1); + const original = randomBytes(4096); + const manifest = await storeBuffer(service, original); + const { buffer } = await service.restore({ manifest }); + expect(buffer.equals(original)).toBe(true); + }); +}); + +describe('Parallel I/O – concurrent store+restore', () => { + it('concurrency: 4 — byte-identical round-trip', async () => { + const { service } = setup(4); + const original = randomBytes(8192); + const manifest = await storeBuffer(service, original); + const { buffer } = await service.restore({ manifest }); + expect(buffer.equals(original)).toBe(true); + }); + + it('concurrency: 4 — chunks are in order', async () => { + const { service } = setup(4); + const manifest = await storeBuffer(service, randomBytes(4096)); + for (let i = 0; i < manifest.chunks.length; i++) { + expect(manifest.chunks[i].index).toBe(i); + } + }); + + it('concurrency: 4 — restoreStream correct', async () => { + const { service } = setup(4); + const original = randomBytes(4096); + const manifest = await storeBuffer(service, original); + const chunks = []; + for await (const c of service.restoreStream({ manifest })) { chunks.push(c); } + expect(Buffer.concat(chunks).equals(original)).toBe(true); + }); +}); + +describe('Parallel I/O – encrypted + compressed', () => { + it('concurrency: 4 with encryption + compression', async () => { + const { service } = setup(4); + const original = Buffer.alloc(4096, 'X'); + const key = randomBytes(32); + const manifest = await storeBuffer(service, original, { + encryptionKey: key, compression: { algorithm: 'gzip' }, + }); + const { buffer } = await service.restore({ manifest, encryptionKey: key }); + expect(buffer.equals(original)).toBe(true); + }); + + it('1-chunk file with concurrency: 10', async () => { + const { service } = setup(10); + const original = randomBytes(512); + const manifest = await storeBuffer(service, original); + const { buffer } = await service.restore({ manifest }); + expect(buffer.equals(original)).toBe(true); + }); +}); + +describe('Parallel I/O – validation', () => { + it('invalid concurrency: 0 throws', () => { + expect(() => setup(0)).toThrow('Concurrency must be a positive integer'); + }); + + it('invalid concurrency: -1 throws', () => { + expect(() => setup(-1)).toThrow('Concurrency must be a positive integer'); + }); +}); diff --git a/test/unit/domain/services/CasService.readManifest.test.js b/test/unit/domain/services/CasService.readManifest.test.js index ec6d9b5d..f498b13c 100644 --- a/test/unit/domain/services/CasService.readManifest.test.js +++ b/test/unit/domain/services/CasService.readManifest.test.js @@ -5,6 +5,7 @@ import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCrypt import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; import Manifest from '../../../../src/domain/value-objects/Manifest.js'; import CasError from '../../../../src/domain/errors/CasError.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; function digestOf(seed) { return createHash('sha256').update(seed).digest('hex'); @@ -38,6 +39,7 @@ function setup() { crypto: new NodeCryptoAdapter(), codec, chunkSize: 1024, + observability: new SilentObserver(), }); return { service, mockPersistence, codec }; diff --git a/test/unit/domain/services/CasService.restore.test.js b/test/unit/domain/services/CasService.restore.test.js index b217de4b..134daab4 100644 --- a/test/unit/domain/services/CasService.restore.test.js +++ b/test/unit/domain/services/CasService.restore.test.js @@ -5,6 +5,7 @@ import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCrypt import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; import Manifest from '../../../../src/domain/value-objects/Manifest.js'; import CasError from '../../../../src/domain/errors/CasError.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; // --------------------------------------------------------------------------- // Module-level helper: store content via async iterable, return manifest @@ -47,6 +48,7 @@ function setup() { crypto, codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); return { crypto, blobStore, mockPersistence, service }; diff --git a/test/unit/domain/services/CasService.restoreStream.test.js b/test/unit/domain/services/CasService.restoreStream.test.js new file mode 100644 index 00000000..de7f9fb2 --- /dev/null +++ b/test/unit/domain/services/CasService.restoreStream.test.js @@ -0,0 +1,155 @@ +import { describe, it, expect, vi } from 'vitest'; +import { randomBytes } from 'node:crypto'; +import CasService from '../../../../src/domain/services/CasService.js'; +import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; +import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; +import EventEmitterObserver from '../../../../src/infrastructure/adapters/EventEmitterObserver.js'; + +function setup(opts = {}) { + const crypto = new NodeCryptoAdapter(); + const blobStore = new Map(); + + const mockPersistence = { + writeBlob: vi.fn().mockImplementation(async (content) => { + const buf = Buffer.isBuffer(content) ? content : Buffer.from(content); + const oid = await crypto.sha256(buf); + blobStore.set(oid, buf); + return oid; + }), + writeTree: vi.fn().mockResolvedValue('mock-tree-oid'), + readBlob: vi.fn().mockImplementation(async (oid) => { + const buf = blobStore.get(oid); + if (!buf) { throw new Error(`Blob not found: ${oid}`); } + return buf; + }), + }; + + const service = new CasService({ + persistence: mockPersistence, + crypto, + codec: new JsonCodec(), + observability: opts.observability || new SilentObserver(), + chunkSize: 1024, + }); + + return { crypto, blobStore, mockPersistence, service }; +} + +async function storeBuffer(svc, buf, opts = {}) { + async function* source() { yield buf; } + return svc.store({ + source: source(), + slug: opts.slug || 'test', + filename: opts.filename || 'test.bin', + encryptionKey: opts.encryptionKey, + compression: opts.compression, + }); +} + +async function collectStream(iterable) { + const chunks = []; + for await (const chunk of iterable) { chunks.push(chunk); } + return Buffer.concat(chunks); +} + +describe('restoreStream – plaintext round-trips', () => { + it('store → restoreStream → byte-compare', async () => { + const { service } = setup(); + const original = randomBytes(3072); + const manifest = await storeBuffer(service, original); + const restored = await collectStream(service.restoreStream({ manifest })); + expect(restored.equals(original)).toBe(true); + }); + + it('handles 0-byte file', async () => { + const { service } = setup(); + const manifest = await storeBuffer(service, Buffer.alloc(0)); + const restored = await collectStream(service.restoreStream({ manifest })); + expect(restored.length).toBe(0); + }); + + it('handles 1-chunk file', async () => { + const { service } = setup(); + const original = randomBytes(512); + const manifest = await storeBuffer(service, original); + const restored = await collectStream(service.restoreStream({ manifest })); + expect(restored.equals(original)).toBe(true); + }); + + it('handles exact-multiple chunk file', async () => { + const { service } = setup(); + const original = randomBytes(2048); + const manifest = await storeBuffer(service, original); + const restored = await collectStream(service.restoreStream({ manifest })); + expect(restored.equals(original)).toBe(true); + }); +}); + +describe('restoreStream – encrypted / compressed', () => { + it('round-trips encrypted file', async () => { + const { service } = setup(); + const original = randomBytes(3072); + const key = randomBytes(32); + const manifest = await storeBuffer(service, original, { encryptionKey: key }); + const restored = await collectStream(service.restoreStream({ manifest, encryptionKey: key })); + expect(restored.equals(original)).toBe(true); + }); + + it('round-trips compressed file', async () => { + const { service } = setup(); + const original = Buffer.alloc(4096, 'A'); + const manifest = await storeBuffer(service, original, { compression: { algorithm: 'gzip' } }); + const restored = await collectStream(service.restoreStream({ manifest })); + expect(restored.equals(original)).toBe(true); + }); + + it('round-trips encrypted + compressed file', async () => { + const { service } = setup(); + const original = Buffer.alloc(4096, 'B'); + const key = randomBytes(32); + const manifest = await storeBuffer(service, original, { + encryptionKey: key, compression: { algorithm: 'gzip' }, + }); + const restored = await collectStream(service.restoreStream({ manifest, encryptionKey: key })); + expect(restored.equals(original)).toBe(true); + }); +}); + +describe('restoreStream – observability events', () => { + it('emits chunk:restored events for unencrypted path', async () => { + const observer = new EventEmitterObserver(); + const { service } = setup({ observability: observer }); + const original = randomBytes(2048); + const manifest = await storeBuffer(service, original); + const handler = vi.fn(); + observer.on('chunk:restored', handler); + await collectStream(service.restoreStream({ manifest })); + expect(handler).toHaveBeenCalledTimes(2); + }); + + it('emits file:restored event', async () => { + const observer = new EventEmitterObserver(); + const { service } = setup({ observability: observer }); + const original = randomBytes(2048); + const manifest = await storeBuffer(service, original); + const handler = vi.fn(); + observer.on('file:restored', handler); + await collectStream(service.restoreStream({ manifest })); + expect(handler).toHaveBeenCalledTimes(1); + expect(handler).toHaveBeenCalledWith(expect.objectContaining({ + slug: 'test', size: 2048, chunkCount: 2, + })); + }); +}); + +describe('restoreStream – consistency with restore()', () => { + it('returns same result as restore() collected', async () => { + const { service } = setup(); + const original = randomBytes(3072); + const manifest = await storeBuffer(service, original); + const { buffer } = await service.restore({ manifest }); + const streamed = await collectStream(service.restoreStream({ manifest })); + expect(buffer.equals(streamed)).toBe(true); + }); +}); diff --git a/test/unit/domain/services/CasService.stream-error.test.js b/test/unit/domain/services/CasService.stream-error.test.js index fa7d900c..84608d90 100644 --- a/test/unit/domain/services/CasService.stream-error.test.js +++ b/test/unit/domain/services/CasService.stream-error.test.js @@ -3,6 +3,7 @@ import CasService from '../../../../src/domain/services/CasService.js'; import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; import CasError from '../../../../src/domain/errors/CasError.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; /** * Creates an async iterable that yields `n` chunks of `chunkSize` bytes @@ -39,6 +40,7 @@ function setup() { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); return { mockPersistence, service }; } diff --git a/test/unit/domain/services/CasService.test.js b/test/unit/domain/services/CasService.test.js index 0669e0f1..508d3d84 100644 --- a/test/unit/domain/services/CasService.test.js +++ b/test/unit/domain/services/CasService.test.js @@ -6,6 +6,7 @@ import CasService from '../../../../src/domain/services/CasService.js'; import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; import Manifest from '../../../../src/domain/value-objects/Manifest.js'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; import { digestOf } from '../../../helpers/crypto.js'; /** @@ -22,6 +23,7 @@ function setup() { crypto: new NodeCryptoAdapter(), codec: new JsonCodec(), chunkSize: 1024, + observability: new SilentObserver(), }); return { mockPersistence, service }; } diff --git a/test/unit/domain/services/Semaphore.test.js b/test/unit/domain/services/Semaphore.test.js new file mode 100644 index 00000000..72194d27 --- /dev/null +++ b/test/unit/domain/services/Semaphore.test.js @@ -0,0 +1,58 @@ +import { describe, it, expect } from 'vitest'; +import Semaphore from '../../../../src/domain/services/Semaphore.js'; + +describe('Semaphore – concurrency limiting', () => { + it('allows up to N concurrent acquires', async () => { + const sem = new Semaphore(2); + await sem.acquire(); + await sem.acquire(); + let resolved = false; + const p = sem.acquire().then(() => { resolved = true; }); + await Promise.resolve(); + expect(resolved).toBe(false); + sem.release(); + await p; + expect(resolved).toBe(true); + }); + + it('release unblocks waiting acquires in order', async () => { + const sem = new Semaphore(1); + await sem.acquire(); + const order = []; + const p1 = sem.acquire().then(() => order.push(1)); + const p2 = sem.acquire().then(() => order.push(2)); + sem.release(); + await p1; + sem.release(); + await p2; + expect(order).toEqual([1, 2]); + }); + + it('concurrency 1 serializes operations', async () => { + const sem = new Semaphore(1); + const order = []; + const task = async (id) => { + await sem.acquire(); + order.push(`start-${id}`); + await new Promise((r) => setTimeout(r, 5)); + order.push(`end-${id}`); + sem.release(); + }; + await Promise.all([task('a'), task('b')]); + expect(order).toEqual(['start-a', 'end-a', 'start-b', 'end-b']); + }); +}); + +describe('Semaphore – validation', () => { + it('throws on concurrency: 0', () => { + expect(() => new Semaphore(0)).toThrow(); + }); + + it('throws on concurrency: -1', () => { + expect(() => new Semaphore(-1)).toThrow(); + }); + + it('throws on concurrency: 1.5', () => { + expect(() => new Semaphore(1.5)).toThrow(); + }); +}); diff --git a/test/unit/infrastructure/adapters/EventEmitterObserver.test.js b/test/unit/infrastructure/adapters/EventEmitterObserver.test.js new file mode 100644 index 00000000..0d564802 --- /dev/null +++ b/test/unit/infrastructure/adapters/EventEmitterObserver.test.js @@ -0,0 +1,73 @@ +import { describe, it, expect, vi } from 'vitest'; +import EventEmitterObserver from '../../../../src/infrastructure/adapters/EventEmitterObserver.js'; + +describe('EventEmitterObserver – metric routing', () => { + it('routes chunk:stored metric to event', () => { + const obs = new EventEmitterObserver(); + const handler = vi.fn(); + obs.on('chunk:stored', handler); + obs.metric('chunk', { action: 'stored', index: 0, size: 1024, digest: 'abc', blob: 'def' }); + expect(handler).toHaveBeenCalledWith({ index: 0, size: 1024, digest: 'abc', blob: 'def' }); + }); + + it('routes file:stored metric to event', () => { + const obs = new EventEmitterObserver(); + const handler = vi.fn(); + obs.on('file:stored', handler); + obs.metric('file', { action: 'stored', slug: 'test', size: 2048, chunkCount: 2, encrypted: false }); + expect(handler).toHaveBeenCalledWith({ slug: 'test', size: 2048, chunkCount: 2, encrypted: false }); + }); + + it('routes integrity:pass metric to event', () => { + const obs = new EventEmitterObserver(); + const handler = vi.fn(); + obs.on('integrity:pass', handler); + obs.metric('integrity', { action: 'pass', slug: 'test' }); + expect(handler).toHaveBeenCalledWith({ slug: 'test' }); + }); + + it('routes integrity:fail metric to event', () => { + const obs = new EventEmitterObserver(); + const handler = vi.fn(); + obs.on('integrity:fail', handler); + obs.metric('integrity', { action: 'fail', slug: 'test', chunkIndex: 0, expected: 'a', actual: 'b' }); + expect(handler).toHaveBeenCalledWith({ slug: 'test', chunkIndex: 0, expected: 'a', actual: 'b' }); + }); +}); + +describe('EventEmitterObserver – error handling', () => { + it('emits error only when listeners are attached', () => { + const obs = new EventEmitterObserver(); + expect(() => obs.metric('error', { code: 'ERR', message: 'fail' })).not.toThrow(); + const handler = vi.fn(); + obs.on('error', handler); + obs.metric('error', { code: 'ERR', message: 'fail' }); + expect(handler).toHaveBeenCalledWith({ code: 'ERR', message: 'fail' }); + }); +}); + +describe('EventEmitterObserver – listener management', () => { + it('removeListener removes the listener', () => { + const obs = new EventEmitterObserver(); + const handler = vi.fn(); + obs.on('chunk:stored', handler); + obs.removeListener('chunk:stored', handler); + obs.metric('chunk', { action: 'stored', index: 0, size: 100 }); + expect(handler).not.toHaveBeenCalled(); + }); + + it('listenerCount returns correct count', () => { + const obs = new EventEmitterObserver(); + expect(obs.listenerCount('chunk:stored')).toBe(0); + const handler = vi.fn(); + obs.on('chunk:stored', handler); + expect(obs.listenerCount('chunk:stored')).toBe(1); + }); + + it('log() and span() do not throw', () => { + const obs = new EventEmitterObserver(); + expect(() => obs.log('info', 'test')).not.toThrow(); + const s = obs.span('op'); + expect(() => s.end()).not.toThrow(); + }); +}); diff --git a/test/unit/infrastructure/adapters/SilentObserver.test.js b/test/unit/infrastructure/adapters/SilentObserver.test.js new file mode 100644 index 00000000..9c86ad13 --- /dev/null +++ b/test/unit/infrastructure/adapters/SilentObserver.test.js @@ -0,0 +1,21 @@ +import { describe, it, expect } from 'vitest'; +import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; + +describe('SilentObserver', () => { + it('metric() does not throw', () => { + const obs = new SilentObserver(); + expect(() => obs.metric('chunk', { action: 'stored' })).not.toThrow(); + }); + + it('log() does not throw', () => { + const obs = new SilentObserver(); + expect(() => obs.log('info', 'hello')).not.toThrow(); + }); + + it('span() returns object with end()', () => { + const obs = new SilentObserver(); + const s = obs.span('op'); + expect(s).toHaveProperty('end'); + expect(() => s.end()).not.toThrow(); + }); +}); diff --git a/test/unit/infrastructure/adapters/StatsCollector.test.js b/test/unit/infrastructure/adapters/StatsCollector.test.js new file mode 100644 index 00000000..494914d8 --- /dev/null +++ b/test/unit/infrastructure/adapters/StatsCollector.test.js @@ -0,0 +1,51 @@ +import { describe, it, expect } from 'vitest'; +import StatsCollector from '../../../../src/infrastructure/adapters/StatsCollector.js'; + +describe('StatsCollector', () => { + it('accumulates chunk metrics', () => { + const stats = new StatsCollector(); + stats.metric('chunk', { action: 'stored', size: 1024 }); + stats.metric('chunk', { action: 'stored', size: 2048 }); + + const s = stats.summary(); + expect(s.chunksProcessed).toBe(2); + expect(s.bytesTotal).toBe(3072); + }); + + it('counts errors', () => { + const stats = new StatsCollector(); + stats.metric('error', { code: 'ERR', message: 'fail' }); + + const s = stats.summary(); + expect(s.errors).toBe(1); + }); + + it('returns zero summary when no metrics emitted', () => { + const stats = new StatsCollector(); + const s = stats.summary(); + expect(s.chunksProcessed).toBe(0); + expect(s.bytesTotal).toBe(0); + expect(s.elapsed).toBe(0); + expect(s.throughput).toBe(0); + expect(s.errors).toBe(0); + }); + + it('calculates elapsed and throughput', async () => { + const stats = new StatsCollector(); + stats.metric('chunk', { action: 'stored', size: 1000 }); + // Small delay to ensure elapsed > 0 + await new Promise((r) => setTimeout(r, 10)); + stats.metric('chunk', { action: 'stored', size: 1000 }); + + const s = stats.summary(); + expect(s.elapsed).toBeGreaterThan(0); + expect(s.throughput).toBeGreaterThan(0); + }); + + it('log() and span() do not throw', () => { + const stats = new StatsCollector(); + expect(() => stats.log('info', 'test')).not.toThrow(); + const s = stats.span('op'); + expect(() => s.end()).not.toThrow(); + }); +}); diff --git a/test/unit/ports/ObservabilityPort.test.js b/test/unit/ports/ObservabilityPort.test.js new file mode 100644 index 00000000..49b17842 --- /dev/null +++ b/test/unit/ports/ObservabilityPort.test.js @@ -0,0 +1,19 @@ +import { describe, it, expect } from 'vitest'; +import ObservabilityPort from '../../../src/ports/ObservabilityPort.js'; + +describe('ObservabilityPort (abstract)', () => { + it('metric() throws Not implemented', () => { + const port = new ObservabilityPort(); + expect(() => port.metric('chunk', {})).toThrow('Not implemented'); + }); + + it('log() throws Not implemented', () => { + const port = new ObservabilityPort(); + expect(() => port.log('info', 'hello')).toThrow('Not implemented'); + }); + + it('span() throws Not implemented', () => { + const port = new ObservabilityPort(); + expect(() => port.span('op')).toThrow('Not implemented'); + }); +}); From 11eed5fd6d05cea9333c1323b4e4d52ad7dd0bf2 Mon Sep 17 00:00:00 2001 From: James Ross Date: Fri, 27 Feb 2026 14:41:51 -0800 Subject: [PATCH 2/3] fix: M14 code review fixes + roadmap visions & concerns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Code review fixes (11 issues): - C1: drain in-flight writes with Promise.allSettled on stream error - M1: remove dead cas/plumbing variables in progress-tracking example - M2: fix stale JSDoc return types in progress.js - m1: merge duplicate node:fs imports in index.js - m2: document restoreStream yielded-chunk semantics - m3: rename chunksWritten → chunksDispatched in STREAM_ERROR - m5: add parallel stream-error test (concurrency: 4) - N1: rename N → readAhead in _restoreStreaming - N3: move eventName after error guard in EventEmitterObserver Roadmap additions: - 6 visions: snapshot trees, portable bundles, manifest diff, CompressionPort, watch mode, interactive passphrase prompt - 7 concerns with mitigations and defensive test specs: memory amplification, orphaned blobs, chunk size cap, Web Crypto buffering, passphrase exposure, KDF rate limiting, GCM nonces --- ROADMAP.md | 517 ++++++++++++++++++ bin/ui/progress.js | 4 +- examples/progress-tracking.js | 15 +- index.js | 3 +- src/domain/services/CasService.js | 18 +- .../adapters/EventEmitterObserver.js | 2 +- .../services/CasService.parallel.test.js | 36 ++ .../services/CasService.stream-error.test.js | 10 +- 8 files changed, 580 insertions(+), 25 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 98b87d27..34066179 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -2328,3 +2328,520 @@ Multiple vaults instead of one. Refs move from `refs/cas/vault` to `refs/cas/vau ### Repo Intelligence - **Duplicate detection on store** — warn if a file being stored already exists as a tracked git blob (same content hash). "This file is already tracked by git — are you sure you want to store it in CAS too?" - **Repo scan / dedup advisor** — `git cas scan` walks the git object database and recommends files that could benefit from CAS (large blobs, binary files, duplicated content across branches). Reports dedup opportunities and potential storage savings. + +--- + +# Ideas & Visions + +New feature concepts with fully fleshed out visions and mini battle plans. Not committed to any milestone — captured here for future consideration and discussion. + +--- + +## Vision 1: Snapshot Trees — Directory-Level Store + +**The Pitch** + +Today, git-cas stores one file at a time. Storing a build output directory means N separate `storeFile()` calls, N separate vault entries, and the caller manually tracking which slugs belong together. There's no concept of "this set of files is one atomic artifact." + +Snapshot trees change that. `git cas store-tree ./dist --slug release/v4.0.0` stores an entire directory as a single CAS tree — one root manifest that references child manifests per file, one vault entry, one tree OID. Restore reconstitutes the full directory structure. This unlocks "store my build output" as a single atomic operation. + +**Why It Matters** + +- **CI/CD artifacts**: `npm run build && git cas store-tree ./dist --slug build/$CI_COMMIT_SHA --tree` — one command, one OID, committed in the release tag. +- **Dataset versioning**: Store a directory of CSV/Parquet files as a single versioned snapshot, restore any version atomically. +- **Config bundles**: Store an entire config directory (TLS certs, env files, deploy scripts) as one encrypted vault entry. +- **Binary releases**: Store a multi-file release (binary + README + license + checksums) as one restorable unit. + +**Manifest Design** + +```json +{ + "version": 3, + "type": "tree", + "slug": "release/v4.0.0", + "entries": [ + { "path": "index.js", "manifestOid": "abc123...", "size": 45200 }, + { "path": "lib/utils.js", "manifestOid": "def456...", "size": 12800 }, + { "path": "assets/logo.png", "manifestOid": "789abc...", "size": 204800 } + ], + "totalSize": 262800, + "totalChunks": 12, + "encryption": { ... }, + "compression": { ... } +} +``` + +Each `entries[].manifestOid` points to a standard file-level manifest blob (v1/v2). The root tree manifest is the index; child manifests are the per-file metadata. Encryption and compression applied per-file, configured at the tree level. + +**Mini Battle Plan** + +| Phase | Work | ~LoC | ~Hours | +|-------|------|------|--------| +| 1. Schema | Add `TreeManifestSchema` with `type: 'tree'`, `entries[]` array. Backward compat: existing manifests have no `type` field (treated as `type: 'file'`). | ~40 | ~2h | +| 2. CasService | `storeTree({ source: string, slug, encryptionKey?, compression? })` — walks directory recursively, stores each file via existing `store()`, collects child manifests, builds root tree manifest. Parallel file stores via semaphore. | ~120 | ~6h | +| 3. CasService | `restoreTree({ manifest, outputDir, encryptionKey? })` — reads root tree manifest, restores each child file to `outputDir/entry.path`. Creates intermediate directories. | ~80 | ~4h | +| 4. Facade | Wire `storeDirectory()` and `restoreDirectory()` through `ContentAddressableStore`. | ~30 | ~1h | +| 5. CLI | `git cas store-tree --slug ` and `git cas restore --slug --out ` (auto-detect tree vs file manifest). | ~40 | ~2h | +| 6. Tests | Round-trip with nested dirs, empty dirs, symlinks (skip or follow?), encrypted+compressed trees, Merkle child manifests. | ~100 | ~4h | +| **Total** | | **~410** | **~19h** | + +**Open Questions** +- Symlinks: follow, skip, or store as metadata? +- Empty directories: include in manifest or skip? +- File permissions: record and restore, or ignore? +- Maximum depth limit to prevent unbounded recursion? + +--- + +## Vision 2: Portable Bundles — Air-Gap Transfer + +**The Pitch** + +`git cas bundle --slug my-asset --out asset.casb` creates a self-contained bundle file that includes the manifest, all chunk blobs, and enough metadata to import into any git-cas-enabled repo. `git cas import --bundle asset.casb` reconstitutes it. Like `git bundle` but for CAS assets. + +This enables offline transfer between air-gapped systems without needing `git push/pull` or shared remotes. Ship a USB stick, email an encrypted bundle, or distribute via any file transfer mechanism. + +**Bundle Format** + +``` +┌─────────────────────────────┐ +│ Magic: "CASB\x01" (5B) │ ← Version 1 bundle +│ Header length (4B) │ +│ Header (JSON): │ +│ { slug, filename, size, │ +│ chunkCount, encrypted, │ +│ compressed, codec } │ +│ Manifest blob (var) │ +│ Chunk 0 length (4B) │ +│ Chunk 0 data (var) │ +│ Chunk 1 length (4B) │ +│ Chunk 1 data (var) │ +│ ... │ +│ SHA-256 checksum (32B) │ ← Over everything above +└─────────────────────────────┘ +``` + +Simple, streamable, no external dependencies. The checksum at the end covers the entire bundle — tamper detection without needing encryption. + +**Mini Battle Plan** + +| Phase | Work | ~LoC | ~Hours | +|-------|------|------|--------| +| 1. Format spec | Define bundle wire format, version byte, header schema. Document in `docs/BUNDLE-FORMAT.md`. | ~0 prod, ~40 docs | ~1h | +| 2. Bundle writer | `CasService.createBundle({ manifest, output: WritableStream })` — streams manifest + chunks into bundle format. Calculates trailing checksum. | ~80 | ~4h | +| 3. Bundle reader | `CasService.importBundle({ input: ReadableStream })` — parses header, validates checksum, writes blobs to Git ODB, returns manifest. | ~100 | ~5h | +| 4. Facade + CLI | `git cas bundle --slug --out ` and `git cas import --bundle [--vault]`. | ~40 | ~2h | +| 5. Tests | Round-trip, corrupted bundle (bad checksum), encrypted bundles, Merkle manifests, partial read (truncated file). | ~80 | ~3h | +| **Total** | | **~340** | **~15h** | + +**Why Not Just `git bundle`?** + +`git bundle` exports entire ref histories. It requires the recipient to have a compatible Git repo structure. CAS bundles export a single asset with just the manifest and blobs — no ref history, no commit chain, no pack negotiation. They're smaller, simpler, and purpose-built for asset transfer. + +--- + +## Vision 3: Manifest Diff Engine + +**The Pitch** + +`git cas diff --from photos/v1 --to photos/v2` compares two manifests and shows which chunks changed, were added, or removed. With CDC (M10), this becomes extremely powerful — you can see exactly which byte ranges of a binary file changed between versions. + +**API Design** + +```js +const diff = await cas.diffManifests({ oldManifest, newManifest }); +// Returns: +// { +// unchanged: [{ index, digest, size }], +// added: [{ index, digest, size }], +// removed: [{ index, digest, size }], +// modified: [{ oldIndex, newIndex, oldDigest, newDigest }], +// summary: { +// unchangedBytes: 1048576, +// addedBytes: 262144, +// removedBytes: 0, +// reuseRatio: 0.8, // 80% of chunks reused +// } +// } +``` + +The diff is purely metadata-based — no blob reads required. Compare chunk digests between manifests. With fixed chunking, any insertion shifts all downstream chunks (low reuse). With CDC, insertions affect 1-2 chunks (high reuse). The `reuseRatio` metric quantifies dedup efficiency. + +**TUI Integration** + +``` +git cas diff --from photos/v1 --to photos/v2 --heatmap + + v1: ████████████████████████████████████████ + v2: ████████░░░░████████████████████████████ + ^^^^ + 2 chunks changed (bytes 524288–786432) + 38/40 chunks reused (95.0%) +``` + +**Mini Battle Plan** + +| Phase | Work | ~LoC | ~Hours | +|-------|------|------|--------| +| 1. Diff engine | `CasService.diffManifests({ oldManifest, newManifest })` — digest-set comparison, handles reordering. | ~60 | ~3h | +| 2. CLI command | `git cas diff --from --to ` with human-readable summary. | ~30 | ~1h | +| 3. Heatmap view | Side-by-side chunk heatmap showing unchanged (green), changed (red), added (yellow) blocks. Reuses bijou gradient components from Task 13.5. | ~40 | ~2h | +| 4. Tests | Identical manifests (0 diff), completely different, single-chunk change, CDC vs fixed dedup comparison. | ~50 | ~2h | +| **Total** | | **~180** | **~8h** | + +**Synergy with M10 Hydra**: Diff becomes dramatically more useful with CDC. Fixed chunking: insert 1 byte → 100% of downstream chunks changed. CDC: insert 1 byte → 1-2 chunks changed. The diff engine quantifies this, making CDC's value proposition concrete and measurable. + +--- + +## Vision 4: CompressionPort — zstd, brotli, lz4 + +**The Pitch** + +Currently, compression is hardcoded to gzip. The `CompressionPort` abstraction mirrors the existing `CryptoPort` and `CodecPort` patterns — a port with pluggable adapters. The manifest already records `compression.algorithm`, so backward compat is built in. + +**Why This Matters** + +| Algorithm | Ratio (typical) | Compress speed | Decompress speed | Best for | +|-----------|-----------------|----------------|------------------|----------| +| gzip | Good | Slow (~50 MB/s) | Moderate (~300 MB/s) | Current default | +| **zstd** | **Excellent** | **Fast (~500 MB/s)** | **Very fast (~1.5 GB/s)** | **General purpose, best all-rounder** | +| brotli | Excellent | Very slow (~10 MB/s) | Fast (~500 MB/s) | Pre-compressed web assets | +| lz4 | Moderate | Ultra-fast (~2 GB/s) | Ultra-fast (~4 GB/s) | Speed-critical, low-latency | + +Zstd alone would give 5-10x faster compression with equal or better ratio. For a tool that compresses before encrypting, compression speed directly impacts store throughput. + +**Mini Battle Plan** + +| Phase | Work | ~LoC | ~Hours | +|-------|------|------|--------| +| 1. Port definition | `src/ports/CompressionPort.js` — `compress(source: AsyncIterable): AsyncIterable` and `decompress(buffer: Buffer): Promise`. Property: `algorithm: string`. | ~20 | ~1h | +| 2. GzipAdapter | Wrap existing `createGzip()` / `gunzipAsync()` logic into adapter. Remove inline gzip from CasService. | ~30 | ~1h | +| 3. ZstdAdapter | Use `@napi-rs/zstd` (native binding, 0-dep) or `fzstd` (pure JS fallback). Streaming compress via transform. | ~40 | ~2h | +| 4. CasService refactor | Replace inline compression with `this.compression.compress(source)` and `this.compression.decompress(buffer)`. Facade accepts `compression: { algorithm: 'gzip' | 'zstd' }` and selects adapter. | ~30 | ~2h | +| 5. Tests + benchmarks | Round-trip with each algorithm. Benchmark: 10 MB file, gzip vs zstd compress speed and ratio. | ~60 | ~2h | +| **Total** | | **~180** | **~8h** | + +**Backward Compatibility**: Old manifests with `compression.algorithm: 'gzip'` still work — the facade selects the gzip adapter. New manifests can specify `'zstd'`. Restoring always reads the algorithm from the manifest, so mixed-algorithm vaults work seamlessly. + +--- + +## Vision 5: Watch Mode — Continuous Sync + +**The Pitch** + +`git cas watch ./data --slug live-data --interval 5s` monitors a file or directory for changes and incrementally re-stores modified content. Combined with CDC (M10), only changed chunks get written. The vault entry updates atomically on each sync cycle. + +**Use Cases** + +- **Development hot-reload**: Watch a model weights file during training; each checkpoint auto-stored with a versioned slug (`live-data@1`, `live-data@2`, ...). +- **Config sync**: Watch a config directory; changes automatically vaulted. +- **Continuous backup**: Low-overhead continuous protection for critical files. + +**Mini Battle Plan** + +| Phase | Work | ~LoC | ~Hours | +|-------|------|------|--------| +| 1. File watcher | Use `fs.watch()` (Node) / `Bun.file().watch()` with debounce (default 1s). Detect create/modify/delete. | ~50 | ~2h | +| 2. Incremental store | On change: re-store the file, diff manifests (Vision 3), skip if unchanged (identical digest). Update vault entry with `--force`. | ~60 | ~3h | +| 3. CLI command | `git cas watch --slug [--interval ] [--key-file ]`. Ctrl-C to stop. | ~30 | ~1h | +| 4. Progress | Live status line showing: last sync time, files watched, total syncs, bytes stored. | ~20 | ~1h | +| 5. Tests | Mock fs.watch, verify debounce, verify vault updates, verify no-op on unchanged files. | ~60 | ~3h | +| **Total** | | **~220** | **~10h** | + +**Synergy with CDC (M10)**: Without CDC, every modification re-stores every chunk downstream of the edit point. With CDC, only 1-2 chunks change per modification. Watch mode + CDC together give efficient continuous incremental storage. + +--- + +## Vision 6: Interactive Passphrase Prompt + +**The Pitch** + +Replace `--vault-passphrase "my secret"` (visible in shell history, `ps` output, and CI logs) with an interactive TTY prompt that reads the passphrase from stdin with echo disabled. Like `gpg`, `ssh-keygen`, and `sudo`. + +``` +$ git cas store ./secrets.tar.gz --slug prod-secrets --vault-passphrase +Enter vault passphrase: •••••••••• +Confirm passphrase: •••••••••• +``` + +Falls back to `GIT_CAS_PASSPHRASE` env var for non-interactive contexts (CI). The flag `--vault-passphrase` without a value triggers the prompt; with a value, uses it directly (backward compatible). + +**Mini Battle Plan** + +| Phase | Work | ~LoC | ~Hours | +|-------|------|------|--------| +| 1. TTY reader | `readPassphrase(prompt: string): Promise` — opens `/dev/tty` (Unix) or `CON` (Windows), sets raw mode, reads until Enter, echoes `•` per character. | ~40 | ~2h | +| 2. CLI integration | When `--vault-passphrase` is passed without a value and stdin is a TTY, call `readPassphrase()`. On store (first use), prompt twice for confirmation. | ~20 | ~1h | +| 3. Tests | Mock TTY input, verify echo suppression, verify confirmation match/mismatch, verify env var fallback. | ~30 | ~1h | +| **Total** | | **~90** | **~4h** | + +**This directly mitigates Concern 5 (shell history exposure) below.** + +--- + +# Concerns & Mitigations + +Architectural and security concerns identified during code review, with proposed mitigations and defensive tests for each. + +--- + +## Concern 1: Memory Amplification on Encrypted/Compressed Restore + +**The Problem** + +`_restoreBuffered()` concatenates ALL chunk blobs into a single buffer, decrypts, then decompresses. Despite `restoreStream()` exposing an `AsyncIterable` API (implying streaming), encrypted or compressed files buffer the entire plaintext in memory. A 10 GB encrypted file attempts a 10 GB allocation — and then potentially a second 10 GB buffer for decompression. + +The JSDoc note added in the M14 review documents this, but there's no runtime guard. A user calling `restoreStream()` expecting constant memory will OOM silently on large encrypted files. + +**Root Cause**: AES-256-GCM requires the entire ciphertext for authentication tag verification before any plaintext is released. You can't verify-then-stream with GCM — it's authenticate-everything-or-nothing. This is a fundamental limitation of the cipher mode, not a bug. + +**Mitigation Strategy** + +| # | Mitigation | Effort | Impact | +|---|-----------|--------|--------| +| M1 | **Memory guard**: Add `maxRestoreBufferSize` option (default 512 MB). `_restoreBuffered()` checks `manifest.size` against limit before allocating. Throws `CasError('RESTORE_TOO_LARGE')` with actionable message suggesting chunked storage without encryption, or increasing the limit. | ~20 LoC | Prevents surprise OOM | +| M2 | **Per-chunk encryption** (long-term): Encrypt each chunk independently with a derived per-chunk nonce (`baseNonce + chunkIndex`). Each chunk gets its own GCM tag. Restore can verify and decrypt per-chunk in O(chunkSize) memory. **Breaking change** — new manifest encryption format. | ~200 LoC | True streaming encrypted restore | +| M3 | **Documentation**: Add a "Memory Model" section to README explaining which code paths buffer and which stream. | ~0 LoC | Sets expectations | + +**Recommended**: M1 (immediate safety net) + M3 (documentation). M2 is a future milestone — it changes the encryption format and requires careful security analysis (per-chunk nonces must not collide, chunk reordering attacks need mitigation via a MAC over the chunk sequence). + +**Defensive Tests** + +``` +describe('Concern 1: Memory guard on encrypted restore', () => { + it('throws RESTORE_TOO_LARGE when manifest.size exceeds maxRestoreBufferSize', ...); + it('succeeds when manifest.size is within maxRestoreBufferSize', ...); + it('does not apply guard to unencrypted uncompressed restoreStream', ...); + it('includes actionable hint in RESTORE_TOO_LARGE error message', ...); +}); +``` + +**New Error Code**: `RESTORE_TOO_LARGE` — "File too large for buffered restore. The encrypted/compressed restore path buffers the entire file in memory. Set `maxRestoreBufferSize` to increase the limit, or store without encryption for streaming restore." + +--- + +## Concern 2: Orphaned Blob Accumulation After STREAM_ERROR + +**The Problem** + +When `_chunkAndStore()` throws `STREAM_ERROR`, chunks already written to Git via `persistence.writeBlob()` are orphaned — they exist in the Git ODB but no tree or ref references them. `git gc` will eventually reclaim them (default grace period: 2 weeks), but: + +1. No tracking of which blobs were orphaned — there's no cleanup manifest or error log. +2. In high-failure environments (unreliable sources, network streams), orphaned blobs accumulate silently. +3. `git count-objects` shows growing "loose objects" with no explanation. + +The `await Promise.allSettled(pending)` fix from C1 ensures in-flight writes complete (no floating promises), but their results are discarded — successful writes still create orphaned blobs. + +**Mitigation Strategy** + +| # | Mitigation | Effort | Impact | +|---|-----------|--------|--------| +| M1 | **Report orphaned blobs in error metadata**: After `Promise.allSettled(pending)`, collect the blob OIDs from fulfilled results and include them in the `STREAM_ERROR` meta: `{ chunksDispatched, orphanedBlobs: ['abc...', 'def...'], originalError }`. Callers can log or clean up. | ~15 LoC | Visibility | +| M2 | **Observability metric**: Emit `observability.metric('error', { action: 'orphaned_blobs', count: N, blobs: [...] })` so monitoring systems can track accumulation. | ~5 LoC | Monitoring | +| M3 | **CLI warning**: When `git cas store` fails with STREAM_ERROR, print: `"Warning: N chunk blobs were written before the error. They will be reclaimed by 'git gc'."` | ~5 LoC | User awareness | + +**Recommended**: M1 + M2 (cheap, high-value visibility). M3 for CLI polish. + +**Defensive Tests** + +``` +describe('Concern 2: Orphaned blob tracking on STREAM_ERROR', () => { + it('includes orphanedBlobs array in STREAM_ERROR meta', ...); + it('orphanedBlobs contains blob OIDs from successful writes before failure', ...); + it('orphanedBlobs is empty when stream fails before any writes', ...); + it('emits orphaned_blobs metric via observability', ...); +}); +``` + +--- + +## Concern 3: No Upper Bound on Chunk Size + +**The Problem** + +`CasService` enforces a minimum chunk size (`chunkSize < 1024` throws), but there's no maximum. A user can configure `chunkSize: 4 * 1024 * 1024 * 1024` (4 GB) — and `git hash-object -w` will attempt to read the entire 4 GB chunk into memory as a single buffer. The `_storeChunk()` method passes the chunk buffer to `persistence.writeBlob()`, which shells out to `git hash-object` via stdin — but the buffer itself is already in Node.js memory. + +Additionally, Git repositories have practical performance limits on individual blob sizes. While there's no hard cap, blobs >100 MB cause significant performance degradation in pack files, and >1 GB blobs can cause `git push` failures on many hosting platforms (GitHub's limit is 100 MB per blob via the API). + +**Mitigation Strategy** + +| # | Mitigation | Effort | Impact | +|---|-----------|--------|--------| +| M1 | **Enforce maximum chunk size**: Add `if (chunkSize > 100 * 1024 * 1024) throw new Error('Chunk size must not exceed 100 MiB')` in the constructor. 100 MiB is generous (default is 256 KiB) while staying within Git hosting limits. | ~3 LoC | Prevents footgun | +| M2 | **Warn above 10 MiB**: Emit `observability.log('warn', 'Large chunk size may impact Git performance', { chunkSize })` when chunkSize > 10 MiB. | ~3 LoC | Soft guidance | + +**Recommended**: M1 (hard cap) + M2 (soft warning). The maximum can be made configurable via an `allowLargeChunks: true` escape hatch for advanced users. + +**Defensive Tests** + +``` +describe('Concern 3: Chunk size upper bound', () => { + it('throws when chunkSize exceeds 100 MiB', ...); + it('accepts chunkSize of exactly 100 MiB', ...); + it('accepts default chunkSize (256 KiB)', ...); + it('accepts minimum chunkSize (1024 bytes)', ...); + it('logs warning when chunkSize exceeds 10 MiB', ...); +}); +``` + +--- + +## Concern 4: Web Crypto Adapter Silent Memory Buffering + +**The Problem** + +`WebCryptoAdapter.createEncryptionStream()` returns an `encrypt()` async generator that appears to stream, but internally accumulates all chunks into a single buffer before calling `crypto.subtle.encrypt()` (which is one-shot for GCM). The Deno runtime uses this adapter. A user on Deno calling `store()` with a 5 GB source will OOM without any indication that streaming is not actually happening. + +The NodeCryptoAdapter and BunCryptoAdapter use `node:crypto` Cipher streams which truly stream — so this is a Deno-specific behavioral difference with no warning. + +**Mitigation Strategy** + +| # | Mitigation | Effort | Impact | +|---|-----------|--------|--------| +| M1 | **Size tracking in encrypt generator**: Track accumulated bytes in `encrypt()`. When total exceeds a configurable limit (default 512 MB), throw `CasError('ENCRYPTION_BUFFER_EXCEEDED')` with message: `"Web Crypto API requires buffering the entire file for GCM encryption. File exceeds buffer limit. Use Node.js or Bun for large encrypted files, or store without encryption."` | ~15 LoC | Prevents silent OOM | +| M2 | **Runtime capability flag**: Add `CryptoPort.capabilities` property: `{ streamingEncryption: boolean }`. WebCryptoAdapter returns `false`. CasService can check this and warn or error when storing large encrypted files on non-streaming runtimes. | ~20 LoC | Architectural awareness | +| M3 | **Adapter-level documentation**: JSDoc on WebCryptoAdapter noting the buffering limitation. | ~5 LoC | Developer awareness | + +**Recommended**: M1 (safety net) + M3 (documentation). M2 is a clean long-term solution. + +**Defensive Tests** + +``` +describe('Concern 4: Web Crypto buffering guard', () => { + it('throws ENCRYPTION_BUFFER_EXCEEDED when accumulated bytes exceed limit', ...); + it('succeeds for files within buffer limit', ...); + it('NodeCryptoAdapter does NOT throw for large files (true streaming)', ...); + it('WebCryptoAdapter.capabilities.streamingEncryption is false', ...); +}); +``` + +**New Error Code**: `ENCRYPTION_BUFFER_EXCEEDED` — "File exceeds encryption buffer limit on this runtime. Web Crypto API (Deno) buffers the entire file for AES-GCM. Use Node.js or Bun for large encrypted files." + +--- + +## Concern 5: Passphrase Exposure in Shell History and Process Listings + +**The Problem** + +The `--vault-passphrase ` CLI flag puts the passphrase in: +1. **Shell history**: `~/.bash_history`, `~/.zsh_history` — survives terminal close, searchable. +2. **Process listing**: `ps aux` shows full command line including the passphrase to all users on the system. +3. **CI logs**: If used in a CI pipeline without masking, the passphrase appears in build logs. + +The `GIT_CAS_PASSPHRASE` env var is better (not in shell history) but still visible in `/proc//environ` on Linux and in process listings on some systems. + +**Mitigation Strategy** + +| # | Mitigation | Effort | Impact | +|---|-----------|--------|--------| +| M1 | **Interactive prompt**: See Vision 6 above. `--vault-passphrase` without a value triggers TTY prompt with echo disabled. Confirmation on first use. | ~90 LoC | Eliminates history exposure | +| M2 | **File-based passphrase**: `--vault-passphrase-file ` reads the passphrase from a file (like `docker secret`, `kubectl --token-file`). File can be tmpfs-backed, permissions-restricted, or injected by a secrets manager. | ~15 LoC | CI-friendly, no process exposure | +| M3 | **Stdin passphrase**: `echo "secret" | git cas store --vault-passphrase -` reads from stdin. Useful in pipes. | ~10 LoC | Scriptable | +| M4 | **Documentation warning**: Add security note in README and `--help` output: "Avoid passing passphrases on the command line. Use `GIT_CAS_PASSPHRASE` env var, `--vault-passphrase-file`, or omit the value for interactive prompt." | ~0 LoC | Awareness | + +**Recommended**: M1 + M2 + M4. Interactive prompt for humans, file-based for CI, documentation for everyone. + +**Defensive Tests** + +``` +describe('Concern 5: Passphrase input security', () => { + it('reads passphrase from file when --vault-passphrase-file is used', ...); + it('prompts interactively when --vault-passphrase is passed without value in TTY', ...); + it('falls back to GIT_CAS_PASSPHRASE env var in non-TTY', ...); + it('errors when no passphrase source is available in non-TTY mode', ...); + it('confirmation prompt rejects mismatched passphrases', ...); +}); +``` + +--- + +## Concern 6: No KDF Brute-Force Rate Limiting + +**The Problem** + +`deriveKey()` and the restore path's `_resolveKeyFromPassphrase()` have no rate limiting, attempt counting, or lockout mechanism. An attacker with access to the API or CLI can brute-force passphrases at full CPU speed: + +- PBKDF2 (100k iterations, SHA-512): ~100-500 attempts/sec on modern hardware. +- scrypt (N=16384, r=8, p=1): ~10-50 attempts/sec. + +For a strong passphrase (>80 bits of entropy), this is fine — but many users choose weak passphrases. There's no warning, no audit trail, and no way to detect an ongoing brute-force attack. + +**Mitigation Strategy** + +| # | Mitigation | Effort | Impact | +|---|-----------|--------|--------| +| M1 | **Observability metric on failed decryption**: Emit `observability.metric('error', { action: 'decryption_failed', slug, attempt })` on every `INTEGRITY_ERROR` during restore. Monitoring systems can alert on anomalous failure rates. | ~5 LoC | Detection | +| M2 | **CLI rate limit**: In the CLI layer (not the library), add a 1-second delay after each failed passphrase attempt. Prevents rapid brute-force via the terminal without affecting the programmatic API. | ~5 LoC | CLI hardening | +| M3 | **Stronger KDF defaults**: Increase PBKDF2 default iterations from 100k to 600k (OWASP 2023 recommendation for SHA-512). Increase scrypt default cost from 16384 to 65536. Document the change as a security improvement. **Note**: this affects store/restore performance — KDF runs once per operation, so the latency increase (100ms → 600ms for PBKDF2) is acceptable for interactive use but may impact batch workflows. | ~5 LoC | Resistance | +| M4 | **Documentation**: Add KDF parameter guidance to SECURITY.md — recommended iterations/cost for different threat models (personal use, team, high-security). | ~0 LoC | Guidance | + +**Recommended**: M1 (detection) + M2 (CLI hardening) + M4 (guidance). M3 is a judgment call — the performance tradeoff is worth discussing. + +**Defensive Tests** + +``` +describe('Concern 6: KDF brute-force awareness', () => { + it('emits decryption_failed metric on wrong passphrase', ...); + it('emits metric with slug context for audit trail', ...); + it('CLI applies delay after failed passphrase attempt', ...); + it('library API does NOT rate-limit (callers manage their own policy)', ...); +}); +``` + +--- + +## Concern 7: GCM Nonce Collision Risk at Scale + +**The Problem** + +AES-256-GCM uses a 96-bit (12-byte) nonce, generated randomly per `encryptBuffer()` / `createEncryptionStream()` call. The birthday bound for 96-bit nonces is ~2^48 — after ~281 trillion encryptions with the same key, nonce collision probability exceeds 50%. In practice, NIST recommends limiting to 2^32 (~4.3 billion) invocations per key for a negligible collision probability. + +For a single user storing files with one key, this is not a practical concern — you'd need to store 4 billion files. But: +1. There's no explicit tracking or warning of nonce count per key. +2. The nonce is pure random, not a counter — so there's no guarantee of uniqueness even at low counts (just overwhelming probability). +3. A nonce collision with GCM is catastrophic — it reveals the XOR of two plaintexts and allows auth tag forgery. + +**Mitigation Strategy** + +| # | Mitigation | Effort | Impact | +|---|-----------|--------|--------| +| M1 | **Document the bound**: Add to SECURITY.md: "AES-256-GCM with random nonces is safe for up to 2^32 encryptions per key. For higher volumes, rotate keys (M12) or use a counter-based nonce scheme." | ~0 LoC | Awareness | +| M2 | **Nonce counter option** (long-term): Add optional `nonceStrategy: 'random' | 'counter'` to encryption options. Counter-based nonces guarantee uniqueness but require persistent state (a counter stored in the vault metadata). Random remains the default for simplicity. | ~60 LoC | Eliminates collision risk | +| M3 | **Key usage counter in vault**: Track `encryptionCount` in vault metadata. When it exceeds 2^31, emit a warning via observability: "Key has been used for N encryptions. Consider rotating." | ~20 LoC | Proactive warning | + +**Recommended**: M1 (immediate, zero-cost) + M3 (proactive warning). M2 is a significant design change that adds state management complexity — only needed for extremely high-volume use cases. + +**Defensive Tests** + +``` +describe('Concern 7: Nonce uniqueness', () => { + it('generates unique nonces across 1000 consecutive encryptions', ...); + it('nonce is exactly 12 bytes (96 bits)', ...); + it('different encryptions of same plaintext with same key produce different ciphertexts', ...); + it('vault tracks encryptionCount and increments per store', ...); + it('warns via observability when encryptionCount exceeds threshold', ...); +}); +``` + +--- + +## Summary Table + +| # | Type | Severity | Fix Cost | Recommended Action | +|---|------|----------|----------|-------------------| +| C1 | Memory amplification | High | ~20 LoC | Add `maxRestoreBufferSize` guard | +| C2 | Orphaned blobs | Medium | ~20 LoC | Report orphaned blob OIDs in error meta | +| C3 | No chunk size cap | Medium | ~6 LoC | Enforce 100 MiB maximum | +| C4 | Web Crypto buffering | Medium | ~15 LoC | Add buffer size guard in WebCryptoAdapter | +| C5 | Passphrase exposure | High | ~90 LoC | Interactive prompt + file-based input | +| C6 | KDF no rate limit | Low | ~10 LoC | Observability metric + CLI delay | +| C7 | GCM nonce collision | Low | ~20 LoC | Document bound + vault usage counter | + +| # | Type | Theme | Est. Cost | +|---|------|-------|-----------| +| V1 | Feature | Snapshot trees (directory store) | ~410 LoC, ~19h | +| V2 | Feature | Portable bundles (air-gap transfer) | ~340 LoC, ~15h | +| V3 | Feature | Manifest diff engine | ~180 LoC, ~8h | +| V4 | Feature | CompressionPort + zstd/brotli/lz4 | ~180 LoC, ~8h | +| V5 | Feature | Watch mode (continuous sync) | ~220 LoC, ~10h | +| V6 | Feature | Interactive passphrase prompt | ~90 LoC, ~4h | diff --git a/bin/ui/progress.js b/bin/ui/progress.js index 422c6339..5bbd77eb 100644 --- a/bin/ui/progress.js +++ b/bin/ui/progress.js @@ -30,7 +30,7 @@ function formatBytes(bytes) { * @param {string} options.filePath - Path to the file being stored. * @param {number} options.chunkSize - Chunk size in bytes. * @param {boolean} [options.quiet] - Suppress all progress output. - * @returns {{ attach(service: EventEmitter): void, detach(): void }} + * @returns {{ attach(observer: { on(event: string, fn: Function): void, removeListener(event: string, fn: Function): void }): void, detach(): void }} */ export function createStoreProgress({ filePath, chunkSize, quiet, fileSize: providedSize }) { if (quiet) { @@ -58,7 +58,7 @@ export function createStoreProgress({ filePath, chunkSize, quiet, fileSize: prov * @param {Object} options * @param {number} options.totalChunks - Number of chunks to restore. * @param {boolean} [options.quiet] - Suppress all progress output. - * @returns {{ attach(service: EventEmitter): void, detach(): void }} + * @returns {{ attach(observer: { on(event: string, fn: Function): void, removeListener(event: string, fn: Function): void }): void, detach(): void }} */ export function createRestoreProgress({ totalChunks, quiet }) { if (quiet || totalChunks === 0) { diff --git a/examples/progress-tracking.js b/examples/progress-tracking.js index c45d6fe4..37e525ec 100755 --- a/examples/progress-tracking.js +++ b/examples/progress-tracking.js @@ -25,10 +25,8 @@ const repoDir = mkdtempSync(path.join(os.tmpdir(), 'cas-progress-')); console.log(`Created temporary repository: ${repoDir}`); execSync('git init --bare', { cwd: repoDir, stdio: 'ignore' }); -// Initialize plumbing and CAS with an EventEmitterObserver -const plumbing = GitPlumbing.createDefault({ cwd: repoDir }); +// Initialize CAS with an EventEmitterObserver const observer = new EventEmitterObserver(); -const cas = ContentAddressableStore.createJson({ plumbing, chunkSize: 128 * 1024 }); // 128 KB chunks // Create a larger test file to see multiple chunks const testDir = mkdtempSync(path.join(os.tmpdir(), 'cas-test-')); @@ -101,8 +99,7 @@ console.log(' - integrity:fail'); console.log(' - error'); // Step 1: Store the file with progress tracking -// NOTE: We pass the observer to createJson options for the CAS to use it -const cas2 = new ContentAddressableStore({ +const cas = new ContentAddressableStore({ plumbing: GitPlumbing.createDefault({ cwd: repoDir }), chunkSize: 128 * 1024, observability: observer, @@ -110,7 +107,7 @@ const cas2 = new ContentAddressableStore({ console.log('\n--- Step 1: Storing file (watch for chunk events) ---\n'); const startStore = Date.now(); -const manifest = await cas2.storeFile({ +const manifest = await cas.storeFile({ filePath: testFilePath, slug: 'progress-demo', filename: 'large-file.bin' @@ -133,7 +130,7 @@ console.log(`Throughput: ${storeThroughputMBps.toFixed(2)} MB/s`); // Step 2: Restore the file with progress tracking console.log('\n--- Step 2: Restoring file (watch for chunk events) ---\n'); const startRestore = Date.now(); -const { buffer, bytesWritten } = await cas2.restore({ manifest }); +const { buffer, bytesWritten } = await cas.restore({ manifest }); const restoreTime = Date.now() - startRestore; console.log(`\nRestore completed in ${restoreTime}ms`); @@ -157,7 +154,7 @@ if (!contentMatches) { // Step 3: Run integrity verification with events console.log('\n--- Step 3: Integrity verification (watch for events) ---\n'); const startVerify = Date.now(); -const isValid = await cas2.verifyIntegrity(manifest); +const isValid = await cas.verifyIntegrity(manifest); const verifyTime = Date.now() - startVerify; console.log(`\nIntegrity verification completed in ${verifyTime}ms`); @@ -194,7 +191,7 @@ observer.on('chunk:stored', progressListener); const testFilePath2 = path.join(testDir, 'progress-demo.bin'); writeFileSync(testFilePath2, randomBytes(fileSize)); -const manifest2 = await cas2.storeFile({ +const manifest2 = await cas.storeFile({ filePath: testFilePath2, slug: 'progress-demo-2', filename: 'progress-demo.bin' diff --git a/index.js b/index.js index 71f3b8dd..a5a83f2a 100644 --- a/index.js +++ b/index.js @@ -3,11 +3,10 @@ * @fileoverview Content Addressable Store - Managed blob storage in Git. */ -import { createReadStream } from 'node:fs'; +import { createReadStream, createWriteStream } from 'node:fs'; import path from 'node:path'; import { Readable, Transform } from 'node:stream'; import { pipeline } from 'node:stream/promises'; -import { createWriteStream } from 'node:fs'; import CasService from './src/domain/services/CasService.js'; import VaultService from './src/domain/services/VaultService.js'; import GitPersistenceAdapter from './src/infrastructure/adapters/GitPersistenceAdapter.js'; diff --git a/src/domain/services/CasService.js b/src/domain/services/CasService.js index b20e81cd..87af7b32 100644 --- a/src/domain/services/CasService.js +++ b/src/domain/services/CasService.js @@ -109,8 +109,9 @@ export default class CasService { const casErr = new CasError( `Stream error during store: ${err.message}`, 'STREAM_ERROR', - { chunksWritten: nextIndex, originalError: err }, + { chunksDispatched: nextIndex, originalError: err }, ); + await Promise.allSettled(pending); this.observability.metric('error', { code: casErr.code, message: casErr.message }); throw casErr; } @@ -468,6 +469,11 @@ export default class CasService { * @param {import('../value-objects/Manifest.js').default} options.manifest - The file manifest. * @param {Buffer} [options.encryptionKey] - 32-byte key, required if manifest is encrypted. * @param {string} [options.passphrase] - Passphrase for KDF-based decryption. + * Note: For unencrypted files, each yielded buffer corresponds to an original + * stored chunk. For encrypted/compressed files, yielded buffers are + * chunkSize-sliced pieces of the decrypted/decompressed result and may not + * correspond 1:1 to the original chunks. + * * @yields {Buffer} * @throws {CasError} MISSING_KEY if manifest is encrypted but no key is provided. * @throws {CasError} INTEGRITY_ERROR if chunk verification or decryption fails. @@ -516,7 +522,7 @@ export default class CasService { */ async *_restoreStreaming(manifest) { const chunks = manifest.chunks; - const N = this.concurrency; + const readAhead = this.concurrency; let totalSize = 0; const readAndVerify = async (chunk) => { @@ -535,17 +541,17 @@ export default class CasService { }; const ahead = []; - for (let i = 0; i < Math.min(N, chunks.length); i++) { + for (let i = 0; i < Math.min(readAhead, chunks.length); i++) { ahead.push(readAndVerify(chunks[i])); } for (let i = 0; i < chunks.length; i++) { - const blob = await ahead[i % N]; + const blob = await ahead[i % readAhead]; this.observability.metric('chunk', { action: 'restored', index: chunks[i].index, size: blob.length, digest: chunks[i].digest }); totalSize += blob.length; - const nextIdx = i + N; + const nextIdx = i + readAhead; if (nextIdx < chunks.length) { - ahead[i % N] = readAndVerify(chunks[nextIdx]); + ahead[i % readAhead] = readAndVerify(chunks[nextIdx]); } yield blob; } diff --git a/src/infrastructure/adapters/EventEmitterObserver.js b/src/infrastructure/adapters/EventEmitterObserver.js index 82467a67..2c227bba 100644 --- a/src/infrastructure/adapters/EventEmitterObserver.js +++ b/src/infrastructure/adapters/EventEmitterObserver.js @@ -20,13 +20,13 @@ export default class EventEmitterObserver { * @param {Object} data - Must include `action` to form the event name. */ metric(channel, data) { - const eventName = `${channel}:${data.action}`; if (channel === 'error') { if (this.#emitter.listenerCount('error') > 0) { this.#emitter.emit('error', data); } return; } + const eventName = `${channel}:${data.action}`; const payload = Object.fromEntries(Object.entries(data).filter(([k]) => k !== 'action')); this.#emitter.emit(eventName, payload); } diff --git a/test/unit/domain/services/CasService.parallel.test.js b/test/unit/domain/services/CasService.parallel.test.js index 57a24a2c..307a42fe 100644 --- a/test/unit/domain/services/CasService.parallel.test.js +++ b/test/unit/domain/services/CasService.parallel.test.js @@ -3,6 +3,7 @@ import { randomBytes } from 'node:crypto'; import CasService from '../../../../src/domain/services/CasService.js'; import NodeCryptoAdapter from '../../../../src/infrastructure/adapters/NodeCryptoAdapter.js'; import JsonCodec from '../../../../src/infrastructure/codecs/JsonCodec.js'; +import CasError from '../../../../src/domain/errors/CasError.js'; import SilentObserver from '../../../../src/infrastructure/adapters/SilentObserver.js'; function setup(concurrency = 1) { @@ -105,6 +106,41 @@ describe('Parallel I/O – encrypted + compressed', () => { }); }); +describe('Parallel I/O – stream error', () => { + function failingSource(chunksBeforeError, chunkSize = 1024) { + let yielded = 0; + return { + [Symbol.asyncIterator]() { + return { + async next() { + if (yielded >= chunksBeforeError) { + throw new Error('simulated stream failure'); + } + yielded++; + return { value: Buffer.alloc(chunkSize, 0xaa), done: false }; + }, + }; + }, + }; + } + + it('concurrency: 4 — STREAM_ERROR with correct chunksDispatched', async () => { + const { service } = setup(4); + try { + await service.store({ + source: failingSource(3), + slug: 'parallel-fail', + filename: 'fail.bin', + }); + expect.unreachable('should have thrown'); + } catch (err) { + expect(err).toBeInstanceOf(CasError); + expect(err.code).toBe('STREAM_ERROR'); + expect(err.meta.chunksDispatched).toBe(3); + } + }); +}); + describe('Parallel I/O – validation', () => { it('invalid concurrency: 0 throws', () => { expect(() => setup(0)).toThrow('Concurrency must be a positive integer'); diff --git a/test/unit/domain/services/CasService.stream-error.test.js b/test/unit/domain/services/CasService.stream-error.test.js index 84608d90..3db23808 100644 --- a/test/unit/domain/services/CasService.stream-error.test.js +++ b/test/unit/domain/services/CasService.stream-error.test.js @@ -72,7 +72,7 @@ describe('CasService stream error – STREAM_ERROR after 3 chunks', () => { }); } catch (err) { expect(err.code).toBe('STREAM_ERROR'); - expect(err.meta.chunksWritten).toBe(3); + expect(err.meta.chunksDispatched).toBe(3); expect(err.message).toContain('simulated stream failure'); } }); @@ -88,7 +88,7 @@ describe('CasService stream error – STREAM_ERROR immediate failure', () => { ({ service } = setup()); }); - it('throws STREAM_ERROR with chunksWritten=0 when stream fails immediately', async () => { + it('throws STREAM_ERROR with chunksDispatched=0 when stream fails immediately', async () => { await expect( service.store({ source: failingSource(0), @@ -105,7 +105,7 @@ describe('CasService stream error – STREAM_ERROR immediate failure', () => { }); } catch (err) { expect(err.code).toBe('STREAM_ERROR'); - expect(err.meta.chunksWritten).toBe(0); + expect(err.meta.chunksDispatched).toBe(0); } }); }); @@ -210,7 +210,7 @@ describe('CasService stream error – fuzz', () => { for (let i = 0; i < 20; i++) { const failAfter = i; - it(`STREAM_ERROR with chunksWritten=${failAfter} (iteration ${i})`, async () => { + it(`STREAM_ERROR with chunksDispatched=${failAfter} (iteration ${i})`, async () => { await expect( service.store({ source: failingSource(failAfter), @@ -227,7 +227,7 @@ describe('CasService stream error – fuzz', () => { }); } catch (err) { expect(err.code).toBe('STREAM_ERROR'); - expect(err.meta.chunksWritten).toBe(failAfter); + expect(err.meta.chunksDispatched).toBe(failAfter); } }); } From 54ebfc08f9d911c861af3938f21105f4b7830e62 Mon Sep 17 00:00:00 2001 From: James Ross Date: Fri, 27 Feb 2026 15:05:56 -0800 Subject: [PATCH 3/3] fix: address CodeRabbit PR feedback (8 issues) Major fixes: - Semaphore: guard against permit underflow in release() - StatsCollector: harden chunk metric against malformed payloads - CasService: validate observability port at construction time - CasService: drain in-flight writes before rethrowing CasError - CasService: drain prefetched reads on restore failure (finally block) Minor fixes: - CasService: emit file:restored metric for empty manifests - ROADMAP: add language tags to fenced code blocks (MD040) - ROADMAP: escape pipe characters in table cells (MD056) --- ROADMAP.md | 24 +++++------ src/domain/services/CasService.js | 41 +++++++++++++++---- src/domain/services/Semaphore.js | 3 ++ src/infrastructure/adapters/StatsCollector.js | 3 +- .../domain/services/CasService.events.test.js | 21 ++++++++++ test/unit/domain/services/CasService.test.js | 24 +++++++++++ test/unit/domain/services/Semaphore.test.js | 5 +++ .../adapters/StatsCollector.test.js | 17 +++++++- 8 files changed, 116 insertions(+), 22 deletions(-) diff --git a/ROADMAP.md b/ROADMAP.md index 34066179..c1e7991e 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -2403,7 +2403,7 @@ This enables offline transfer between air-gapped systems without needing `git pu **Bundle Format** -``` +```text ┌─────────────────────────────┐ │ Magic: "CASB\x01" (5B) │ ← Version 1 bundle │ Header length (4B) │ @@ -2517,7 +2517,7 @@ Zstd alone would give 5-10x faster compression with equal or better ratio. For a | 1. Port definition | `src/ports/CompressionPort.js` — `compress(source: AsyncIterable): AsyncIterable` and `decompress(buffer: Buffer): Promise`. Property: `algorithm: string`. | ~20 | ~1h | | 2. GzipAdapter | Wrap existing `createGzip()` / `gunzipAsync()` logic into adapter. Remove inline gzip from CasService. | ~30 | ~1h | | 3. ZstdAdapter | Use `@napi-rs/zstd` (native binding, 0-dep) or `fzstd` (pure JS fallback). Streaming compress via transform. | ~40 | ~2h | -| 4. CasService refactor | Replace inline compression with `this.compression.compress(source)` and `this.compression.decompress(buffer)`. Facade accepts `compression: { algorithm: 'gzip' | 'zstd' }` and selects adapter. | ~30 | ~2h | +| 4. CasService refactor | Replace inline compression with `this.compression.compress(source)` and `this.compression.decompress(buffer)`. Facade accepts `compression: { algorithm: 'gzip' \| 'zstd' }` and selects adapter. | ~30 | ~2h | | 5. Tests + benchmarks | Round-trip with each algorithm. Benchmark: 10 MB file, gzip vs zstd compress speed and ratio. | ~60 | ~2h | | **Total** | | **~180** | **~8h** | @@ -2558,7 +2558,7 @@ Zstd alone would give 5-10x faster compression with equal or better ratio. For a Replace `--vault-passphrase "my secret"` (visible in shell history, `ps` output, and CI logs) with an interactive TTY prompt that reads the passphrase from stdin with echo disabled. Like `gpg`, `ssh-keygen`, and `sudo`. -``` +```shell $ git cas store ./secrets.tar.gz --slug prod-secrets --vault-passphrase Enter vault passphrase: •••••••••• Confirm passphrase: •••••••••• @@ -2607,7 +2607,7 @@ The JSDoc note added in the M14 review documents this, but there's no runtime gu **Defensive Tests** -``` +```js describe('Concern 1: Memory guard on encrypted restore', () => { it('throws RESTORE_TOO_LARGE when manifest.size exceeds maxRestoreBufferSize', ...); it('succeeds when manifest.size is within maxRestoreBufferSize', ...); @@ -2644,7 +2644,7 @@ The `await Promise.allSettled(pending)` fix from C1 ensures in-flight writes com **Defensive Tests** -``` +```js describe('Concern 2: Orphaned blob tracking on STREAM_ERROR', () => { it('includes orphanedBlobs array in STREAM_ERROR meta', ...); it('orphanedBlobs contains blob OIDs from successful writes before failure', ...); @@ -2674,7 +2674,7 @@ Additionally, Git repositories have practical performance limits on individual b **Defensive Tests** -``` +```js describe('Concern 3: Chunk size upper bound', () => { it('throws when chunkSize exceeds 100 MiB', ...); it('accepts chunkSize of exactly 100 MiB', ...); @@ -2706,7 +2706,7 @@ The NodeCryptoAdapter and BunCryptoAdapter use `node:crypto` Cipher streams whic **Defensive Tests** -``` +```js describe('Concern 4: Web Crypto buffering guard', () => { it('throws ENCRYPTION_BUFFER_EXCEEDED when accumulated bytes exceed limit', ...); it('succeeds for files within buffer limit', ...); @@ -2736,14 +2736,14 @@ The `GIT_CAS_PASSPHRASE` env var is better (not in shell history) but still visi |---|-----------|--------|--------| | M1 | **Interactive prompt**: See Vision 6 above. `--vault-passphrase` without a value triggers TTY prompt with echo disabled. Confirmation on first use. | ~90 LoC | Eliminates history exposure | | M2 | **File-based passphrase**: `--vault-passphrase-file ` reads the passphrase from a file (like `docker secret`, `kubectl --token-file`). File can be tmpfs-backed, permissions-restricted, or injected by a secrets manager. | ~15 LoC | CI-friendly, no process exposure | -| M3 | **Stdin passphrase**: `echo "secret" | git cas store --vault-passphrase -` reads from stdin. Useful in pipes. | ~10 LoC | Scriptable | +| M3 | **Stdin passphrase**: `echo "secret" \| git cas store --vault-passphrase -` reads from stdin. Useful in pipes. | ~10 LoC | Scriptable | | M4 | **Documentation warning**: Add security note in README and `--help` output: "Avoid passing passphrases on the command line. Use `GIT_CAS_PASSPHRASE` env var, `--vault-passphrase-file`, or omit the value for interactive prompt." | ~0 LoC | Awareness | **Recommended**: M1 + M2 + M4. Interactive prompt for humans, file-based for CI, documentation for everyone. **Defensive Tests** -``` +```js describe('Concern 5: Passphrase input security', () => { it('reads passphrase from file when --vault-passphrase-file is used', ...); it('prompts interactively when --vault-passphrase is passed without value in TTY', ...); @@ -2779,7 +2779,7 @@ For a strong passphrase (>80 bits of entropy), this is fine — but many users c **Defensive Tests** -``` +```js describe('Concern 6: KDF brute-force awareness', () => { it('emits decryption_failed metric on wrong passphrase', ...); it('emits metric with slug context for audit trail', ...); @@ -2806,14 +2806,14 @@ For a single user storing files with one key, this is not a practical concern | # | Mitigation | Effort | Impact | |---|-----------|--------|--------| | M1 | **Document the bound**: Add to SECURITY.md: "AES-256-GCM with random nonces is safe for up to 2^32 encryptions per key. For higher volumes, rotate keys (M12) or use a counter-based nonce scheme." | ~0 LoC | Awareness | -| M2 | **Nonce counter option** (long-term): Add optional `nonceStrategy: 'random' | 'counter'` to encryption options. Counter-based nonces guarantee uniqueness but require persistent state (a counter stored in the vault metadata). Random remains the default for simplicity. | ~60 LoC | Eliminates collision risk | +| M2 | **Nonce counter option** (long-term): Add optional `nonceStrategy: 'random' \| 'counter'` to encryption options. Counter-based nonces guarantee uniqueness but require persistent state (a counter stored in the vault metadata). Random remains the default for simplicity. | ~60 LoC | Eliminates collision risk | | M3 | **Key usage counter in vault**: Track `encryptionCount` in vault metadata. When it exceeds 2^31, emit a warning via observability: "Key has been used for N encryptions. Consider rotating." | ~20 LoC | Proactive warning | **Recommended**: M1 (immediate, zero-cost) + M3 (proactive warning). M2 is a significant design change that adds state management complexity — only needed for extremely high-volume use cases. **Defensive Tests** -``` +```js describe('Concern 7: Nonce uniqueness', () => { it('generates unique nonces across 1000 consecutive encryptions', ...); it('nonce is exactly 12 bytes (96 bits)', ...); diff --git a/src/domain/services/CasService.js b/src/domain/services/CasService.js index 87af7b32..6cc284f0 100644 --- a/src/domain/services/CasService.js +++ b/src/domain/services/CasService.js @@ -30,6 +30,7 @@ export default class CasService { * @param {number} [options.concurrency=1] - Maximum parallel chunk I/O operations. */ constructor({ persistence, codec, crypto, observability, chunkSize = 256 * 1024, merkleThreshold = 1000, concurrency = 1 }) { + CasService._validateObservability(observability); if (chunkSize < 1024) { throw new Error('Chunk size must be at least 1024 bytes'); } @@ -48,6 +49,22 @@ export default class CasService { this.concurrency = concurrency; } + /** + * Validates that observability implements ObservabilityPort. + * @private + * @param {*} observability + */ + static _validateObservability(observability) { + if ( + !observability || + typeof observability.metric !== 'function' || + typeof observability.log !== 'function' || + typeof observability.span !== 'function' + ) { + throw new Error('observability must implement ObservabilityPort'); + } + } + /** * Generates a SHA-256 hex digest for a buffer. * @private @@ -105,6 +122,7 @@ export default class CasService { } } } catch (err) { + await Promise.allSettled(pending); if (err instanceof CasError) { throw err; } const casErr = new CasError( `Stream error during store: ${err.message}`, @@ -482,6 +500,9 @@ export default class CasService { const key = await this._resolveEncryptionKey(manifest, encryptionKey, passphrase); if (manifest.chunks.length === 0) { + this.observability.metric('file', { + action: 'restored', slug: manifest.slug, size: 0, chunkCount: 0, + }); return; } @@ -545,15 +566,19 @@ export default class CasService { ahead.push(readAndVerify(chunks[i])); } - for (let i = 0; i < chunks.length; i++) { - const blob = await ahead[i % readAhead]; - this.observability.metric('chunk', { action: 'restored', index: chunks[i].index, size: blob.length, digest: chunks[i].digest }); - totalSize += blob.length; - const nextIdx = i + readAhead; - if (nextIdx < chunks.length) { - ahead[i % readAhead] = readAndVerify(chunks[nextIdx]); + try { + for (let i = 0; i < chunks.length; i++) { + const blob = await ahead[i % readAhead]; + this.observability.metric('chunk', { action: 'restored', index: chunks[i].index, size: blob.length, digest: chunks[i].digest }); + totalSize += blob.length; + const nextIdx = i + readAhead; + if (nextIdx < chunks.length) { + ahead[i % readAhead] = readAndVerify(chunks[nextIdx]); + } + yield blob; } - yield blob; + } finally { + await Promise.allSettled(ahead); } this.observability.metric('file', { diff --git a/src/domain/services/Semaphore.js b/src/domain/services/Semaphore.js index 274c50a1..266bda20 100644 --- a/src/domain/services/Semaphore.js +++ b/src/domain/services/Semaphore.js @@ -38,6 +38,9 @@ export default class Semaphore { const next = this.#queue.shift(); next(); } else { + if (this.#active === 0) { + throw new Error('Semaphore release called without an active permit'); + } this.#active--; } } diff --git a/src/infrastructure/adapters/StatsCollector.js b/src/infrastructure/adapters/StatsCollector.js index 6d87f9c9..a19a58a8 100644 --- a/src/infrastructure/adapters/StatsCollector.js +++ b/src/infrastructure/adapters/StatsCollector.js @@ -13,7 +13,8 @@ export default class StatsCollector { } if (channel === 'chunk') { this.#chunksProcessed++; - this.#bytesTotal += data.size || 0; + const size = Number.isFinite(data?.size) ? data.size : 0; + this.#bytesTotal += size; } if (channel === 'error') { this.#errors++; diff --git a/test/unit/domain/services/CasService.events.test.js b/test/unit/domain/services/CasService.events.test.js index 7de6e54e..2c15abc8 100644 --- a/test/unit/domain/services/CasService.events.test.js +++ b/test/unit/domain/services/CasService.events.test.js @@ -245,3 +245,24 @@ describe('CasService events – event count verification', () => { expect(listener).toHaveBeenCalledTimes(1); }); }); + +describe('CasService events – empty manifest file:restored', () => { + it('emits file:restored with size 0 for empty manifest', async () => { + const { service, observer } = setup(); + + async function* emptySource() {} + const manifest = await service.store({ + source: emptySource(), slug: 'empty', filename: 'empty.bin', + }); + expect(manifest.chunks).toHaveLength(0); + + const onFileRestored = vi.fn(); + observer.on('file:restored', onFileRestored); + await service.restore({ manifest }); + + expect(onFileRestored).toHaveBeenCalledTimes(1); + expect(onFileRestored).toHaveBeenCalledWith(expect.objectContaining({ + slug: 'empty', size: 0, chunkCount: 0, + })); + }); +}); diff --git a/test/unit/domain/services/CasService.test.js b/test/unit/domain/services/CasService.test.js index 508d3d84..7f84b87a 100644 --- a/test/unit/domain/services/CasService.test.js +++ b/test/unit/domain/services/CasService.test.js @@ -28,6 +28,30 @@ function setup() { return { mockPersistence, service }; } +// --------------------------------------------------------------------------- +// observability validation +// --------------------------------------------------------------------------- +describe('CasService – observability validation', () => { + it('throws when observability is missing', () => { + expect(() => new CasService({ + persistence: {}, + crypto: new NodeCryptoAdapter(), + codec: new JsonCodec(), + chunkSize: 1024, + })).toThrow('observability must implement ObservabilityPort'); + }); + + it('throws when observability is missing metric()', () => { + expect(() => new CasService({ + persistence: {}, + crypto: new NodeCryptoAdapter(), + codec: new JsonCodec(), + chunkSize: 1024, + observability: { log() {}, span() { return { end() {} }; } }, + })).toThrow('observability must implement ObservabilityPort'); + }); +}); + // --------------------------------------------------------------------------- // store // --------------------------------------------------------------------------- diff --git a/test/unit/domain/services/Semaphore.test.js b/test/unit/domain/services/Semaphore.test.js index 72194d27..87ab5164 100644 --- a/test/unit/domain/services/Semaphore.test.js +++ b/test/unit/domain/services/Semaphore.test.js @@ -55,4 +55,9 @@ describe('Semaphore – validation', () => { it('throws on concurrency: 1.5', () => { expect(() => new Semaphore(1.5)).toThrow(); }); + + it('throws when release is called without an active permit', () => { + const sem = new Semaphore(1); + expect(() => sem.release()).toThrow('Semaphore release called without an active permit'); + }); }); diff --git a/test/unit/infrastructure/adapters/StatsCollector.test.js b/test/unit/infrastructure/adapters/StatsCollector.test.js index 494914d8..c6751330 100644 --- a/test/unit/infrastructure/adapters/StatsCollector.test.js +++ b/test/unit/infrastructure/adapters/StatsCollector.test.js @@ -1,7 +1,7 @@ import { describe, it, expect } from 'vitest'; import StatsCollector from '../../../../src/infrastructure/adapters/StatsCollector.js'; -describe('StatsCollector', () => { +describe('StatsCollector – accumulation', () => { it('accumulates chunk metrics', () => { const stats = new StatsCollector(); stats.metric('chunk', { action: 'stored', size: 1024 }); @@ -41,6 +41,21 @@ describe('StatsCollector', () => { expect(s.elapsed).toBeGreaterThan(0); expect(s.throughput).toBeGreaterThan(0); }); +}); + +describe('StatsCollector – robustness', () => { + it('handles malformed chunk payloads gracefully', () => { + const stats = new StatsCollector(); + stats.metric('chunk', { action: 'stored' }); + stats.metric('chunk', { action: 'stored', size: undefined }); + stats.metric('chunk', { action: 'stored', size: NaN }); + stats.metric('chunk', { action: 'stored', size: 'not-a-number' }); + stats.metric('chunk', { action: 'stored', size: Infinity }); + + const s = stats.summary(); + expect(s.chunksProcessed).toBe(5); + expect(s.bytesTotal).toBe(0); + }); it('log() and span() do not throw', () => { const stats = new StatsCollector();