From f6deb944c079afe59213b55dc5635968fa846b8a Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Tue, 7 Apr 2026 22:00:13 +0800 Subject: [PATCH 01/15] docs: add integration tests design spec Define 8 test dimensions (~41 scenarios) covering functional correctness, network errors, resource leaks, concurrency safety, stress tests, data integrity, graceful shutdown, and idempotency. Includes shared mock OCI registry infrastructure design. Signed-off-by: Zhao Chen --- .../2026-04-07-integration-tests-design.md | 286 ++++++++++++++++++ 1 file changed, 286 insertions(+) create mode 100644 docs/superpowers/specs/2026-04-07-integration-tests-design.md diff --git a/docs/superpowers/specs/2026-04-07-integration-tests-design.md b/docs/superpowers/specs/2026-04-07-integration-tests-design.md new file mode 100644 index 00000000..0da75a77 --- /dev/null +++ b/docs/superpowers/specs/2026-04-07-integration-tests-design.md @@ -0,0 +1,286 @@ +# Integration Tests Design Spec + +## Goal + +Add comprehensive integration tests across modctl to cover functional correctness, stress scenarios, and error handling. Tests only — no functional code changes. + +## Scope + +8 test dimensions, ~41 test scenarios, covering `pkg/modelfile`, `pkg/backend`, `internal/pb`, and `cmd/` layers. + +**Out of scope:** Security tests (path traversal, TLS, credential handling) — deferred to a future effort. + +## Test Infrastructure + +### Mock OCI Registry (`test/helpers/mockregistry.go`) + +A reusable `httptest`-based mock OCI registry server with fault injection. + +```go +type MockRegistry struct { + server *httptest.Server + manifests map[string][]byte // ref -> manifest bytes + blobs map[string][]byte // digest -> blob bytes + faults *FaultConfig +} + +type FaultConfig struct { + LatencyPerRequest time.Duration // per-request delay + FailAfterNBytes int64 // disconnect after N bytes (partial response) + DropConnectionRate float64 // random connection drop probability [0,1) + StatusCodeOverride int // force HTTP status (500, 401, 429) + FailOnNthRequest int // fail on Nth request (test retry) + PathFaults map[string]*FaultConfig // per-path overrides +} +``` + +**Implemented OCI endpoints (minimal subset used by oras-go):** +- `GET /v2/` — ping +- `HEAD|GET /v2//manifests/` — manifest operations +- `PUT /v2//manifests/` — push manifest +- `HEAD|GET /v2//blobs/` — blob operations +- `POST /v2//blobs/uploads/` — start upload +- `PUT /v2//blobs/uploads/` — complete upload + +**API:** +```go +func NewMockRegistry() *MockRegistry +func (r *MockRegistry) WithFault(f *FaultConfig) *MockRegistry +func (r *MockRegistry) AddManifest(ref string, manifest []byte) *MockRegistry +func (r *MockRegistry) AddBlob(digest string, content []byte) *MockRegistry +func (r *MockRegistry) Host() string +func (r *MockRegistry) Close() +func (r *MockRegistry) RequestCount() int +func (r *MockRegistry) RequestCountByPath(path string) int +``` + +Self-tested in `test/helpers/mockregistry_test.go`. + +### Resource Tracking Utilities (`test/helpers/tracking.go`) + +```go +type TrackingReadCloser struct { + io.ReadCloser + closed atomic.Bool +} + +func NewTrackingReadCloser(rc io.ReadCloser) *TrackingReadCloser +func (t *TrackingReadCloser) Close() error +func (t *TrackingReadCloser) WasClosed() bool +``` + +Used to assert resources are properly closed on all code paths. + +## Test Dimensions & Scenarios + +### Dimension 1: Functional Correctness (no build tag) + +**File: `pkg/modelfile/modelfile_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_ExcludePatterns_SinglePattern` | `--exclude *.log` removes .log files from generated Modelfile | +| `TestIntegration_ExcludePatterns_MultiplePatterns` | `--exclude *.log --exclude checkpoints/*` removes both | +| `TestIntegration_ExcludePatterns_NestedDirExclude` | `--exclude data/` skips entire directory tree | +| `TestIntegration_ContentRoundTrip` | workspace -> NewModelfileByWorkspace -> Content() -> NewModelfile parse back, verify equivalence | +| `TestIntegration_ConfigJson_Malformed` | Malformed config.json doesn't crash, metadata fields empty | +| `TestIntegration_ConfigJson_PartialFields` | config.json with only `model_type`, other fields stay empty | +| `TestIntegration_ConfigJson_ConflictingValues` | config.json and generation_config.json have different `torch_dtype`, last-wins behavior | + +**File: `pkg/backend/pull_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_Pull_HappyPath` | Pull manifest + blobs from mock registry, verify stored correctly | +| `TestIntegration_Pull_BlobAlreadyExists` | Pre-populate local blob, pull skips it | +| `TestIntegration_Pull_ConcurrentLayers` | 5 blobs in parallel, all stored correctly | + +**File: `pkg/backend/push_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_Push_HappyPath` | Push manifest + blobs to mock registry, verify received | +| `TestIntegration_Push_BlobAlreadyExists` | Mock registry reports blob exists, push skips re-upload and re-tags | + +**File: `cmd/modelfile/generate_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_CLI_Generate_BasicFlags` | CLI with `--name`, `--arch`, `--family` produces correct Modelfile | +| `TestIntegration_CLI_Generate_OutputAndOverwrite` | `--output` writes to specified dir; without `--overwrite` fails if exists | +| `TestIntegration_CLI_Generate_MutualExclusion` | `--model-url` with path arg returns error | + +### Dimension 2: Network Errors (no build tag) + +**File: `pkg/backend/pull_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_Pull_RetryOnTransientError` | `FailOnNthRequest: 2`, third request succeeds; verify retry works | +| `TestIntegration_Pull_RetryExhausted` | `StatusCodeOverride: 500` on all requests; verify clean error after retries | +| `TestIntegration_Pull_AuthError_NoRetry` | `StatusCodeOverride: 401`; verify immediate failure, no retry | +| `TestIntegration_Pull_RateLimited` | `StatusCodeOverride: 429`; verify backoff retry behavior | +| `TestIntegration_Pull_ContextTimeout` | `LatencyPerRequest: 30s` + short context deadline; verify context cancelled error | +| `TestIntegration_Pull_PartialResponse` | `FailAfterNBytes: 1024`; verify error propagation, no corrupted data stored | +| `TestIntegration_Pull_ManifestOK_BlobFails` | `PathFaults` on specific blob digest; verify error mentions the failed digest | + +**File: `pkg/backend/push_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_Push_RetryOnTransientError` | Transient 500 on blob push, verify retry succeeds | +| `TestIntegration_Push_ManifestPushFails` | Manifest endpoint returns 500; verify error after blob push succeeds | + +### Dimension 3: Resource Leak Detection (no build tag) + +**File: `pkg/backend/push_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestKnownBug_Push_ReadCloserLeak` | Push with blob upload failure; TrackingReadCloser asserts Close() called. **Known bug: push.go:175 does not close ReadCloser on error path.** | + +**File: `pkg/backend/pull_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_Pull_ReaderClosedOnError` | Mock registry returns error mid-blob; verify manifest and blob readers all closed | +| `TestIntegration_Pull_TempDirCleanup` | Pull with download to temp dir then error; verify temp dir removed | + +### Dimension 4: Concurrency Safety (no build tag) + +Run with `go test -race`. + +**File: `internal/pb/pb_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestKnownBug_DisableProgress_DataRace` | Concurrent `SetDisableProgress()` + `Add()` from multiple goroutines. **Known bug: pb.go global flag has no atomic protection.** | +| `TestIntegration_ProgressBar_ConcurrentUpdates` | Multiple goroutines calling `Add()`, `Abort()`, `Complete()` simultaneously; no panic | + +**File: `pkg/backend/pull_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_Pull_ConcurrentPartialFailure` | 5 blobs, 2 fail; verify errgroup cancels remaining, no goroutine hang | +| `TestIntegration_Pull_ConcurrentContextCancel` | Cancel context during concurrent pull; verify all goroutines exit | + +### Dimension 5: Stress Tests (`//go:build stress`) + +**File: `pkg/modelfile/modelfile_stress_test.go`** + +| Test | Description | +|------|-------------| +| `TestStress_NearMaxFileCount` | 2040 files (near MaxWorkspaceFileCount=2048); verify success | +| `TestStress_DeeplyNestedDirs` | 100-level nested directory; verify no stack overflow | + +**File: `pkg/backend/pull_integration_test.go` (stress-tagged section)** + +| Test | Description | +|------|-------------| +| `TestStress_Pull_ManyLayers` | 50 concurrent blobs; verify errgroup scheduling, no deadlock | +| `TestStress_Pull_RepeatedCycles` | 100x pull loop; verify goroutine count stays stable via `runtime.NumGoroutine()` | + +### Dimension 6: Data Integrity (no build tag) + +**File: `pkg/backend/pull_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_Pull_TruncatedBlob` | Mock returns fewer bytes than Content-Length; verify digest validation catches it | +| `TestIntegration_Pull_CorruptedBlob` | Mock returns wrong bytes; verify digest mismatch error | + +**File: `pkg/backend/push_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_Push_VerifyBlobIntegrity` | After push, verify mock registry received exact bytes matching source digest | + +### Dimension 7: Graceful Shutdown (no build tag) + +**File: `pkg/backend/pull_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_Pull_ContextCancelMidDownload` | Cancel context while blob download in progress; verify goroutines exit, temp files cleaned | + +**File: `pkg/backend/push_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_Push_ContextCancelMidUpload` | Cancel context during blob upload; verify clean exit | + +### Dimension 8: Idempotency & Config Boundary (no build tag) + +**File: `pkg/backend/pull_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_Pull_Idempotent` | Pull twice; second pull makes zero blob fetches | + +**File: `pkg/backend/push_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_Push_Idempotent` | Push twice; second push skips all blobs | + +**File: `cmd/cli_integration_test.go`** + +| Test | Description | +|------|-------------| +| `TestIntegration_CLI_ConcurrencyZero` | `--concurrency=0` returns validation error | +| `TestIntegration_CLI_ConcurrencyNegative` | `--concurrency=-1` returns validation error | +| `TestIntegration_CLI_ExtractFromRemoteNoDir` | `--extract-from-remote` without `--extract-dir` returns error | + +## Known Bug Tests + +Tests prefixed with `TestKnownBug_` are expected to fail. They document real bugs discovered during analysis: + +| Test | Bug Location | Description | +|------|-------------|-------------| +| `TestKnownBug_Push_ReadCloserLeak` | `push.go:175-189` | PullBlob ReadCloser not closed on error path | +| `TestKnownBug_DisableProgress_DataRace` | `pb.go:33-40` | Global disableProgress flag has no atomic protection | + +These tests serve as regression documentation. Each should reference a tracked GitHub issue in its failure message. + +## File Structure + +``` +test/helpers/ + mockregistry.go + mockregistry_test.go + tracking.go + +pkg/modelfile/ + modelfile_integration_test.go # Dimensions 1 + modelfile_stress_test.go # Dimension 5 (//go:build stress) + +pkg/backend/ + pull_integration_test.go # Dimensions 1,2,3,4,5,6,7,8 + push_integration_test.go # Dimensions 1,2,3,6,7,8 + fetch_integration_test.go # Dimensions 1,2 + +internal/pb/ + pb_integration_test.go # Dimension 4 + +cmd/modelfile/ + generate_integration_test.go # Dimension 1 + +cmd/ + cli_integration_test.go # Dimension 8 +``` + +## Conventions + +- **Naming:** `TestIntegration_*` for normal tests, `TestKnownBug_*` for documented bugs, `TestStress_*` for stress tests +- **Isolation:** Each test creates its own temp dir / mock server; no shared mutable state +- **Build tags:** Fast tests have no tag (`go test ./...`); stress tests require `//go:build stress` +- **Race detection:** All concurrency tests must pass `go test -race` +- **Known bugs:** Let tests fail with descriptive message + issue link; do not use `t.Skip` + +## Dependencies + +No new external dependencies. Uses: +- `net/http/httptest` (stdlib) +- `github.com/stretchr/testify` (already in go.mod) +- `runtime` (stdlib, for goroutine counting in stress tests) +- `sync/atomic` (stdlib, for TrackingReadCloser) From 2f85f2c4003b34d04e14b87245fdd65d02576c88 Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Tue, 7 Apr 2026 22:27:34 +0800 Subject: [PATCH 02/15] docs: update integration tests spec with review feedback - Known bug tests use reverse assertions (CI stays green) - Retry-dependent network tests moved to //go:build slowtest - Remove untestable pull reader/tempdir leak scenarios - Push leak tests cover both success and error paths - Deduplicate against existing test coverage - Add GitHub issue references (#491, #492, #493, #494) - Add auth-error-retry as known design issue Signed-off-by: Zhao Chen --- .../2026-04-07-integration-tests-design.md | 167 +++++++++++------- 1 file changed, 108 insertions(+), 59 deletions(-) diff --git a/docs/superpowers/specs/2026-04-07-integration-tests-design.md b/docs/superpowers/specs/2026-04-07-integration-tests-design.md index 0da75a77..dc9d1c4e 100644 --- a/docs/superpowers/specs/2026-04-07-integration-tests-design.md +++ b/docs/superpowers/specs/2026-04-07-integration-tests-design.md @@ -6,15 +6,26 @@ Add comprehensive integration tests across modctl to cover functional correctnes ## Scope -8 test dimensions, ~41 test scenarios, covering `pkg/modelfile`, `pkg/backend`, `internal/pb`, and `cmd/` layers. +8 test dimensions, ~35 test scenarios, covering `pkg/modelfile`, `pkg/backend`, `internal/pb`, and `cmd/` layers. **Out of scope:** Security tests (path traversal, TLS, credential handling) — deferred to a future effort. +## Known Bugs Discovered + +The following bugs were found during analysis. Each must be filed as a GitHub issue before implementation begins, and referenced by issue number in the corresponding `TestKnownBug_*` test. + +| Bug | Location | Description | Severity | Issue | +|-----|----------|-------------|----------|-------| +| Push ReadCloser leak | `push.go:175-189` | `src.PullBlob()` returns `io.ReadCloser` that is never explicitly closed — on both success and error paths. The reader is wrapped with `io.NopCloser()` before passing to `dst.Blobs().Push()`, so even when push closes the wrapper, the original content is not closed. | Critical | [#491](https://github.com/modelpack/modctl/issues/491) | +| disableProgress data race | `internal/pb/pb.go:33-40` | Global `disableProgress` bool is written by `SetDisableProgress()` and read by `Add()` without any synchronization (no mutex, no atomic). Triggers data race under concurrent pull/push. | Critical | [#493](https://github.com/modelpack/modctl/issues/493) | +| splitReader goroutine leak | `pkg/backend/build/builder.go:413-430` | `splitReader()` spawns a goroutine that copies to a `MultiWriter` over two `PipeWriter`s. If either pipe reader is abandoned (e.g., interceptor fails), the goroutine blocks indefinitely on write. No context-based cancellation. | Major | [#492](https://github.com/modelpack/modctl/issues/492) | +| Auth errors retried unconditionally | `pull.go:116-129`, `retry.go:25-29` | `retry.Do()` retries all errors including 401/403. Auth errors should fail immediately. | Minor | [#494](https://github.com/modelpack/modctl/issues/494) | + ## Test Infrastructure ### Mock OCI Registry (`test/helpers/mockregistry.go`) -A reusable `httptest`-based mock OCI registry server with fault injection. +A reusable `httptest`-based mock OCI registry server with fault injection. Centralizes the pattern already used in `fetch_test.go` into a shared helper. ```go type MockRegistry struct { @@ -69,29 +80,28 @@ func (t *TrackingReadCloser) Close() error func (t *TrackingReadCloser) WasClosed() bool ``` -Used to assert resources are properly closed on all code paths. +Used in push leak tests where mock Storage's `PullBlob()` can return a tracked reader. ## Test Dimensions & Scenarios ### Dimension 1: Functional Correctness (no build tag) +Existing coverage considered: +- `fetch_test.go` already has 6 httptest scenarios for fetch — no new fetch happy path tests needed. +- `pkg/modelfile/modelfile_test.go` covers config.json parsing, content generation, workspace walking — only `ExcludePatterns` integration and CLI-level tests are new. + **File: `pkg/modelfile/modelfile_integration_test.go`** | Test | Description | |------|-------------| -| `TestIntegration_ExcludePatterns_SinglePattern` | `--exclude *.log` removes .log files from generated Modelfile | -| `TestIntegration_ExcludePatterns_MultiplePatterns` | `--exclude *.log --exclude checkpoints/*` removes both | -| `TestIntegration_ExcludePatterns_NestedDirExclude` | `--exclude data/` skips entire directory tree | -| `TestIntegration_ContentRoundTrip` | workspace -> NewModelfileByWorkspace -> Content() -> NewModelfile parse back, verify equivalence | -| `TestIntegration_ConfigJson_Malformed` | Malformed config.json doesn't crash, metadata fields empty | -| `TestIntegration_ConfigJson_PartialFields` | config.json with only `model_type`, other fields stay empty | -| `TestIntegration_ConfigJson_ConflictingValues` | config.json and generation_config.json have different `torch_dtype`, last-wins behavior | +| `TestIntegration_ExcludePatterns_SinglePattern` | `ExcludePatterns: ["*.log"]` through `NewModelfileByWorkspace` removes .log files | +| `TestIntegration_ExcludePatterns_MultiplePatterns` | `ExcludePatterns: ["*.log", "checkpoints/*"]` removes both | **File: `pkg/backend/pull_integration_test.go`** | Test | Description | |------|-------------| -| `TestIntegration_Pull_HappyPath` | Pull manifest + blobs from mock registry, verify stored correctly | +| `TestIntegration_Pull_HappyPath` | Pull manifest + blobs from mock registry via httptest, verify stored correctly | | `TestIntegration_Pull_BlobAlreadyExists` | Pre-populate local blob, pull skips it | | `TestIntegration_Pull_ConcurrentLayers` | 5 blobs in parallel, all stored correctly | @@ -99,7 +109,7 @@ Used to assert resources are properly closed on all code paths. | Test | Description | |------|-------------| -| `TestIntegration_Push_HappyPath` | Push manifest + blobs to mock registry, verify received | +| `TestIntegration_Push_HappyPath` | Push manifest + blobs to mock registry via httptest, verify received | | `TestIntegration_Push_BlobAlreadyExists` | Mock registry reports blob exists, push skips re-upload and re-tags | **File: `cmd/modelfile/generate_integration_test.go`** @@ -110,62 +120,83 @@ Used to assert resources are properly closed on all code paths. | `TestIntegration_CLI_Generate_OutputAndOverwrite` | `--output` writes to specified dir; without `--overwrite` fails if exists | | `TestIntegration_CLI_Generate_MutualExclusion` | `--model-url` with path arg returns error | -### Dimension 2: Network Errors (no build tag) +### Dimension 2: Network Errors -**File: `pkg/backend/pull_integration_test.go`** +Existing coverage considered: +- `retry_test.go` tests the retry-go library in isolation (zero-delay, pure function). New tests cover retry behavior integrated with real HTTP through pull/push functions. +- `fetch_test.go` already covers error references and non-matching patterns. No new fetch error tests needed — pull/push cover the same retry.Do() codepath. + +**Split:** Fast tests (context-based, no retry wait) run by default. Slow tests (real retry backoff) require `//go:build slowtest`. + +**Fast tests (no build tag) — File: `pkg/backend/pull_integration_test.go`** | Test | Description | |------|-------------| -| `TestIntegration_Pull_RetryOnTransientError` | `FailOnNthRequest: 2`, third request succeeds; verify retry works | -| `TestIntegration_Pull_RetryExhausted` | `StatusCodeOverride: 500` on all requests; verify clean error after retries | -| `TestIntegration_Pull_AuthError_NoRetry` | `StatusCodeOverride: 401`; verify immediate failure, no retry | -| `TestIntegration_Pull_RateLimited` | `StatusCodeOverride: 429`; verify backoff retry behavior | -| `TestIntegration_Pull_ContextTimeout` | `LatencyPerRequest: 30s` + short context deadline; verify context cancelled error | +| `TestIntegration_Pull_ContextTimeout` | `LatencyPerRequest: 30s` + short context deadline (100ms); verify context cancelled, no retry wait | | `TestIntegration_Pull_PartialResponse` | `FailAfterNBytes: 1024`; verify error propagation, no corrupted data stored | | `TestIntegration_Pull_ManifestOK_BlobFails` | `PathFaults` on specific blob digest; verify error mentions the failed digest | -**File: `pkg/backend/push_integration_test.go`** +**Fast tests (no build tag) — File: `pkg/backend/push_integration_test.go`** | Test | Description | |------|-------------| -| `TestIntegration_Push_RetryOnTransientError` | Transient 500 on blob push, verify retry succeeds | -| `TestIntegration_Push_ManifestPushFails` | Manifest endpoint returns 500; verify error after blob push succeeds | +| `TestIntegration_Push_ManifestPushFails` | Manifest endpoint returns 500; verify error surfaces | -### Dimension 3: Resource Leak Detection (no build tag) +**Slow tests (`//go:build slowtest`) — File: `pkg/backend/pull_slowtest_test.go`** -**File: `pkg/backend/push_integration_test.go`** +Note: Current implementation applies `retry.Do(...)` unconditionally (`pull.go:116-129`) with 6 attempts, 5s initial delay, 60s max (`retry.go:25-29`). Auth errors (401) ARE retried — this is a design issue documented as a known-bug test. | Test | Description | |------|-------------| -| `TestKnownBug_Push_ReadCloserLeak` | Push with blob upload failure; TrackingReadCloser asserts Close() called. **Known bug: push.go:175 does not close ReadCloser on error path.** | +| `TestSlow_Pull_RetryOnTransientError` | `FailOnNthRequest: 2`, third succeeds; verify retry works | +| `TestSlow_Pull_RetryExhausted` | `StatusCodeOverride: 500` all requests; verify clean error after 6 retries | +| `TestSlow_Pull_RateLimited` | `StatusCodeOverride: 429`; verify backoff retry | +| `TestKnownBug_Pull_AuthErrorStillRetries` | `StatusCodeOverride: 401`; assert request count == 6 (documents: auth errors should not retry). Reverse assertion — passes now, fails when fixed. | -**File: `pkg/backend/pull_integration_test.go`** +**Slow tests (`//go:build slowtest`) — File: `pkg/backend/push_slowtest_test.go`** + +| Test | Description | +|------|-------------| +| `TestSlow_Push_RetryOnTransientError` | Transient 500 on blob push, verify retry succeeds | + +### Dimension 3: Resource Leak Detection (no build tag) + +Existing coverage: None. + +Scope adjustment: Pull reader/tempdir leak tests removed — Pull() does not create temp dirs (`pull.go:61-170`), and blob readers are created inside oras-go internals where TrackingReadCloser cannot be injected in black-box tests. Push leak tests ARE feasible because `src.PullBlob()` goes through mock Storage, allowing TrackingReadCloser injection. + +**File: `pkg/backend/push_integration_test.go`** | Test | Description | |------|-------------| -| `TestIntegration_Pull_ReaderClosedOnError` | Mock registry returns error mid-blob; verify manifest and blob readers all closed | -| `TestIntegration_Pull_TempDirCleanup` | Pull with download to temp dir then error; verify temp dir removed | +| `TestKnownBug_Push_ReadCloserNotClosed_SuccessPath` | Push succeeds; TrackingReadCloser from mock PullBlob asserts Close() was NOT called (reverse assertion — documents the bug on success path). | +| `TestKnownBug_Push_ReadCloserNotClosed_ErrorPath` | Push fails (blob upload error); same assertion. Documents leak on error path. | + +Both tests use reverse assertions: they pass today (asserting the leak exists). When the bug is fixed, they fail, prompting the developer to flip the assertion and remove the `KnownBug` prefix. ### Dimension 4: Concurrency Safety (no build tag) +Existing coverage: Zero concurrency tests anywhere in the project. + Run with `go test -race`. **File: `internal/pb/pb_integration_test.go`** | Test | Description | |------|-------------| -| `TestKnownBug_DisableProgress_DataRace` | Concurrent `SetDisableProgress()` + `Add()` from multiple goroutines. **Known bug: pb.go global flag has no atomic protection.** | -| `TestIntegration_ProgressBar_ConcurrentUpdates` | Multiple goroutines calling `Add()`, `Abort()`, `Complete()` simultaneously; no panic | +| `TestKnownBug_DisableProgress_DataRace` | Concurrent `SetDisableProgress()` + `Add()` from multiple goroutines. Reverse assertion: run without `-race` and assert the code executes without panic (passes). The real detection is via `go test -race` which will report the data race. | +| `TestIntegration_ProgressBar_ConcurrentUpdates` | Multiple goroutines calling `Add()`, `Abort()`, `Complete()` simultaneously; no panic, no deadlock | **File: `pkg/backend/pull_integration_test.go`** | Test | Description | |------|-------------| -| `TestIntegration_Pull_ConcurrentPartialFailure` | 5 blobs, 2 fail; verify errgroup cancels remaining, no goroutine hang | -| `TestIntegration_Pull_ConcurrentContextCancel` | Cancel context during concurrent pull; verify all goroutines exit | +| `TestIntegration_Pull_ConcurrentPartialFailure` | 5 blobs, 2 fail; verify errgroup cancels remaining, all goroutines exit, no hang | ### Dimension 5: Stress Tests (`//go:build stress`) +Existing coverage: `TestWorkspaceLimits` tests exceeding MaxWorkspaceFileCount (error case). New tests cover near-limit success and other stress vectors. + **File: `pkg/modelfile/modelfile_stress_test.go`** | Test | Description | @@ -173,15 +204,17 @@ Run with `go test -race`. | `TestStress_NearMaxFileCount` | 2040 files (near MaxWorkspaceFileCount=2048); verify success | | `TestStress_DeeplyNestedDirs` | 100-level nested directory; verify no stack overflow | -**File: `pkg/backend/pull_integration_test.go` (stress-tagged section)** +**File: `pkg/backend/pull_stress_test.go`** | Test | Description | |------|-------------| | `TestStress_Pull_ManyLayers` | 50 concurrent blobs; verify errgroup scheduling, no deadlock | -| `TestStress_Pull_RepeatedCycles` | 100x pull loop; verify goroutine count stays stable via `runtime.NumGoroutine()` | +| `TestStress_Pull_RepeatedCycles` | 100x pull loop; verify goroutine count stable via `runtime.NumGoroutine()` | ### Dimension 6: Data Integrity (no build tag) +Existing coverage: None for pull/push data integrity. + **File: `pkg/backend/pull_integration_test.go`** | Test | Description | @@ -197,11 +230,13 @@ Run with `go test -race`. ### Dimension 7: Graceful Shutdown (no build tag) +Existing coverage: `retry_test.go` tests context cancel on retry library only. + **File: `pkg/backend/pull_integration_test.go`** | Test | Description | |------|-------------| -| `TestIntegration_Pull_ContextCancelMidDownload` | Cancel context while blob download in progress; verify goroutines exit, temp files cleaned | +| `TestIntegration_Pull_ContextCancelMidDownload` | Cancel context while concurrent blob download in progress; verify all goroutines exit (covers both graceful shutdown and concurrent cancel) | **File: `pkg/backend/push_integration_test.go`** @@ -211,17 +246,19 @@ Run with `go test -race`. ### Dimension 8: Idempotency & Config Boundary (no build tag) +Existing coverage: None. + **File: `pkg/backend/pull_integration_test.go`** | Test | Description | |------|-------------| -| `TestIntegration_Pull_Idempotent` | Pull twice; second pull makes zero blob fetches | +| `TestIntegration_Pull_Idempotent` | Pull twice; second pull makes zero blob fetches (verified via mock registry request count) | **File: `pkg/backend/push_integration_test.go`** | Test | Description | |------|-------------| -| `TestIntegration_Push_Idempotent` | Push twice; second push skips all blobs | +| `TestIntegration_Push_Idempotent` | Push twice; second push skips all blob uploads | **File: `cmd/cli_integration_test.go`** @@ -231,51 +268,63 @@ Run with `go test -race`. | `TestIntegration_CLI_ConcurrencyNegative` | `--concurrency=-1` returns validation error | | `TestIntegration_CLI_ExtractFromRemoteNoDir` | `--extract-from-remote` without `--extract-dir` returns error | -## Known Bug Tests - -Tests prefixed with `TestKnownBug_` are expected to fail. They document real bugs discovered during analysis: +## Known Bug Test Strategy -| Test | Bug Location | Description | -|------|-------------|-------------| -| `TestKnownBug_Push_ReadCloserLeak` | `push.go:175-189` | PullBlob ReadCloser not closed on error path | -| `TestKnownBug_DisableProgress_DataRace` | `pb.go:33-40` | Global disableProgress flag has no atomic protection | +Tests prefixed with `TestKnownBug_` use **reverse assertions**: they assert the buggy behavior EXISTS, so they pass in CI today. When the bug is fixed, the test fails, prompting the developer to: +1. Flip the assertion (e.g., `assert.False` → `assert.True`) +2. Remove the `KnownBug_` prefix +3. Close the linked GitHub issue -These tests serve as regression documentation. Each should reference a tracked GitHub issue in its failure message. +Example pattern: +```go +func TestKnownBug_Push_ReadCloserNotClosed_SuccessPath(t *testing.T) { + // ... trigger push via mock storage + mock registry ... + + // BUG: content ReadCloser is never explicitly closed. + // See: https://github.com/modelpack/modctl/issues/491 + assert.False(t, tracker.WasClosed(), + "If this fails, the ReadCloser leak has been fixed! "+ + "Flip to assert.True and remove KnownBug prefix.") +} +``` ## File Structure ``` test/helpers/ - mockregistry.go - mockregistry_test.go - tracking.go + mockregistry.go # shared mock OCI registry with fault injection + mockregistry_test.go # self-tests for mock registry + tracking.go # TrackingReadCloser and other resource trackers pkg/modelfile/ - modelfile_integration_test.go # Dimensions 1 - modelfile_stress_test.go # Dimension 5 (//go:build stress) + modelfile_integration_test.go # Dim 1: ExcludePatterns integration (2 tests) + modelfile_stress_test.go # Dim 5: //go:build stress (2 tests) pkg/backend/ - pull_integration_test.go # Dimensions 1,2,3,4,5,6,7,8 - push_integration_test.go # Dimensions 1,2,3,6,7,8 - fetch_integration_test.go # Dimensions 1,2 + pull_integration_test.go # Dim 1,2,4,6,7,8 (10 tests) + push_integration_test.go # Dim 1,2,3,6,7,8 (9 tests) + pull_slowtest_test.go # Dim 2: //go:build slowtest (4 tests) + push_slowtest_test.go # Dim 2: //go:build slowtest (1 test) + pull_stress_test.go # Dim 5: //go:build stress (2 tests) internal/pb/ - pb_integration_test.go # Dimension 4 + pb_integration_test.go # Dim 4: concurrency safety (2 tests) cmd/modelfile/ - generate_integration_test.go # Dimension 1 + generate_integration_test.go # Dim 1: CLI layer (3 tests) cmd/ - cli_integration_test.go # Dimension 8 + cli_integration_test.go # Dim 8: config boundary (3 tests) ``` ## Conventions -- **Naming:** `TestIntegration_*` for normal tests, `TestKnownBug_*` for documented bugs, `TestStress_*` for stress tests +- **Naming:** `TestIntegration_*` for normal, `TestKnownBug_*` for documented bugs (reverse assertion), `TestStress_*` for stress, `TestSlow_*` for retry-dependent - **Isolation:** Each test creates its own temp dir / mock server; no shared mutable state -- **Build tags:** Fast tests have no tag (`go test ./...`); stress tests require `//go:build stress` +- **Build tags:** Default (`go test ./...`) for fast tests; `//go:build slowtest` for retry tests; `//go:build stress` for stress tests - **Race detection:** All concurrency tests must pass `go test -race` -- **Known bugs:** Let tests fail with descriptive message + issue link; do not use `t.Skip` +- **Known bugs:** Reverse assertion pattern; each test references a GitHub issue number in its failure message +- **Issue tracking:** All known bugs filed as GitHub issues before implementation begins ## Dependencies From 667ba0557b44c9c6f2fdadf789e8254049690ed8 Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Tue, 7 Apr 2026 22:36:55 +0800 Subject: [PATCH 03/15] docs: add integration tests implementation plan 9 tasks covering mock registry infrastructure, resource tracking, modelfile/pull/push/CLI/pb integration tests, slow tests, and stress tests. Each task has TDD-style steps with complete code. Signed-off-by: Zhao Chen --- .../2026-04-07-integration-tests-plan.md | 2222 +++++++++++++++++ 1 file changed, 2222 insertions(+) create mode 100644 docs/superpowers/plans/2026-04-07-integration-tests-plan.md diff --git a/docs/superpowers/plans/2026-04-07-integration-tests-plan.md b/docs/superpowers/plans/2026-04-07-integration-tests-plan.md new file mode 100644 index 00000000..ac91b421 --- /dev/null +++ b/docs/superpowers/plans/2026-04-07-integration-tests-plan.md @@ -0,0 +1,2222 @@ +# Integration Tests Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add ~35 integration tests across modctl covering 8 dimensions (functional correctness, network errors, resource leaks, concurrency safety, stress, data integrity, graceful shutdown, idempotency) without changing any functional code. + +**Architecture:** Shared mock OCI registry (`test/helpers/`) with fault injection serves as remote; mock Storage from `test/mocks/storage/` serves as local. Tests in `*_integration_test.go` files alongside existing code. Known bugs use reverse assertions (pass today, fail when fixed). + +**Tech Stack:** Go stdlib `net/http/httptest`, `github.com/stretchr/testify` (already in go.mod), `test/mocks/storage/storage.go` (already generated) + +**Spec:** `docs/superpowers/specs/2026-04-07-integration-tests-design.md` + +**Known bug issues:** #491 (push ReadCloser leak), #492 (splitReader goroutine leak), #493 (disableProgress data race), #494 (auth error retry) + +--- + +## File Structure + +``` +test/helpers/ + mockregistry.go # CREATE — shared mock OCI registry with fault injection + mockregistry_test.go # CREATE — self-tests for mock registry + tracking.go # CREATE — TrackingReadCloser utility + +pkg/modelfile/ + modelfile_integration_test.go # CREATE — ExcludePatterns tests (2 tests) + modelfile_stress_test.go # CREATE — //go:build stress (2 tests) + +pkg/backend/ + pull_integration_test.go # CREATE — pull tests across dims 1,2,4,6,7,8 (10 tests) + push_integration_test.go # CREATE — push tests across dims 1,2,3,6,7,8 (9 tests) + pull_slowtest_test.go # CREATE — //go:build slowtest (4 tests) + push_slowtest_test.go # CREATE — //go:build slowtest (1 test) + pull_stress_test.go # CREATE — //go:build stress (2 tests) + +internal/pb/ + pb_integration_test.go # CREATE — concurrency tests (2 tests) + +cmd/modelfile/ + generate_integration_test.go # CREATE — CLI generate tests (3 tests) + +cmd/ + cli_integration_test.go # CREATE — config boundary tests (3 tests) +``` + +--- + +### Task 1: Mock OCI Registry + +**Files:** +- Create: `test/helpers/mockregistry.go` +- Create: `test/helpers/mockregistry_test.go` + +This is the foundation for all backend integration tests. Follows the pattern in `pkg/backend/fetch_test.go:54-94` but as a reusable, fault-injectable helper. + +- [ ] **Step 1: Create `test/helpers/mockregistry.go`** + +```go +package helpers + +import ( + "crypto/sha256" + "encoding/json" + "fmt" + "io" + "net/http" + "net/http/httptest" + "strings" + "sync" + "sync/atomic" + "time" + + godigest "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" +) + +// FaultConfig controls fault injection for the mock registry. +type FaultConfig struct { + LatencyPerRequest time.Duration // per-request delay + FailAfterNBytes int64 // disconnect after N bytes written + StatusCodeOverride int // force HTTP status code + FailOnNthRequest int // fail Nth request (1-based), 0 = disabled + PathFaults map[string]*FaultConfig // per-path override (matched by suffix) +} + +// MockRegistry is a test OCI registry backed by httptest.Server. +type MockRegistry struct { + server *httptest.Server + mu sync.RWMutex + manifests map[string][]byte // "repo:ref" -> manifest JSON + blobs map[string][]byte // digest string -> content + + faults *FaultConfig + requestCount atomic.Int64 + pathCounts sync.Map // path string -> *atomic.Int64 + + // uploads tracks in-progress blob uploads + uploads map[string][]byte // upload UUID -> accumulated data + uploadsMu sync.Mutex + uploadSeq atomic.Int64 +} + +// NewMockRegistry creates a mock OCI registry with no faults. +func NewMockRegistry() *MockRegistry { + mr := &MockRegistry{ + manifests: make(map[string][]byte), + blobs: make(map[string][]byte), + uploads: make(map[string][]byte), + } + mr.server = httptest.NewServer(http.HandlerFunc(mr.handler)) + return mr +} + +// WithFault sets the fault configuration. Call before making requests. +func (mr *MockRegistry) WithFault(f *FaultConfig) *MockRegistry { + mr.faults = f + return mr +} + +// AddManifest pre-populates a manifest. ref is "repo:tag". +func (mr *MockRegistry) AddManifest(ref string, manifest ocispec.Manifest) *MockRegistry { + data, _ := json.Marshal(manifest) + mr.mu.Lock() + mr.manifests[ref] = data + mr.mu.Unlock() + return mr +} + +// AddBlob pre-populates a blob by its content. Returns the digest. +func (mr *MockRegistry) AddBlob(content []byte) string { + dgst := godigest.FromBytes(content) + mr.mu.Lock() + mr.blobs[dgst.String()] = content + mr.mu.Unlock() + return dgst.String() +} + +// Host returns the registry host (e.g., "127.0.0.1:PORT") without scheme. +func (mr *MockRegistry) Host() string { + return strings.TrimPrefix(mr.server.URL, "http://") +} + +// Close shuts down the mock server. +func (mr *MockRegistry) Close() { + mr.server.Close() +} + +// RequestCount returns total requests received. +func (mr *MockRegistry) RequestCount() int64 { + return mr.requestCount.Load() +} + +// RequestCountByPath returns request count for a specific path suffix. +func (mr *MockRegistry) RequestCountByPath(pathSuffix string) int64 { + if val, ok := mr.pathCounts.Load(pathSuffix); ok { + return val.(*atomic.Int64).Load() + } + return 0 +} + +// BlobExists checks if a blob was received (useful for push verification). +func (mr *MockRegistry) BlobExists(digest string) bool { + mr.mu.RLock() + defer mr.mu.RUnlock() + _, ok := mr.blobs[digest] + return ok +} + +// GetBlob returns blob content (useful for push integrity verification). +func (mr *MockRegistry) GetBlob(digest string) ([]byte, bool) { + mr.mu.RLock() + defer mr.mu.RUnlock() + data, ok := mr.blobs[digest] + return data, ok +} + +// ManifestExists checks if a manifest ref was received. +func (mr *MockRegistry) ManifestExists(ref string) bool { + mr.mu.RLock() + defer mr.mu.RUnlock() + _, ok := mr.manifests[ref] + return ok +} + +func (mr *MockRegistry) handler(w http.ResponseWriter, r *http.Request) { + mr.requestCount.Add(1) + mr.trackPath(r.URL.Path) + + // Resolve fault config (path-specific overrides global). + fault := mr.resolveFault(r.URL.Path) + + // Apply fault: check Nth request failure. + if fault != nil && fault.FailOnNthRequest > 0 { + count := mr.requestCount.Load() + if int(count) <= fault.FailOnNthRequest { + http.Error(w, "injected fault", http.StatusInternalServerError) + return + } + } + + // Apply fault: latency. + if fault != nil && fault.LatencyPerRequest > 0 { + time.Sleep(fault.LatencyPerRequest) + } + + // Apply fault: status code override. + if fault != nil && fault.StatusCodeOverride > 0 { + http.Error(w, "injected status", fault.StatusCodeOverride) + return + } + + // Route the request. + path := r.URL.Path + switch { + case path == "/v2/" || path == "/v2": + w.WriteHeader(http.StatusOK) + + case strings.Contains(path, "/manifests/"): + mr.handleManifest(w, r, fault) + + case strings.Contains(path, "/blobs/uploads/"): + mr.handleBlobUpload(w, r) + + case strings.Contains(path, "/blobs/"): + mr.handleBlob(w, r, fault) + + default: + http.NotFound(w, r) + } +} + +func (mr *MockRegistry) handleManifest(w http.ResponseWriter, r *http.Request, fault *FaultConfig) { + // Parse: /v2//manifests/ + // repo can contain slashes, ref is the last segment after /manifests/ + parts := strings.SplitN(r.URL.Path, "/manifests/", 2) + if len(parts) != 2 { + http.NotFound(w, r) + return + } + repo := strings.TrimPrefix(parts[0], "/v2/") + ref := parts[1] + key := repo + ":" + ref + + switch r.Method { + case http.MethodGet: + mr.mu.RLock() + data, ok := mr.manifests[key] + mr.mu.RUnlock() + if !ok { + http.NotFound(w, r) + return + } + w.Header().Set("Content-Type", ocispec.MediaTypeImageManifest) + w.Header().Set("Docker-Content-Digest", godigest.FromBytes(data).String()) + w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data))) + mr.writeWithFault(w, data, fault) + + case http.MethodHead: + mr.mu.RLock() + data, ok := mr.manifests[key] + mr.mu.RUnlock() + if !ok { + http.NotFound(w, r) + return + } + w.Header().Set("Content-Type", ocispec.MediaTypeImageManifest) + w.Header().Set("Docker-Content-Digest", godigest.FromBytes(data).String()) + w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data))) + w.WriteHeader(http.StatusOK) + + case http.MethodPut: + body, err := io.ReadAll(r.Body) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + mr.mu.Lock() + mr.manifests[key] = body + // Also store by digest for HEAD lookups + dgst := godigest.FromBytes(body) + mr.manifests[repo+":"+dgst.String()] = body + mr.mu.Unlock() + w.Header().Set("Docker-Content-Digest", dgst.String()) + w.WriteHeader(http.StatusCreated) + + default: + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + } +} + +func (mr *MockRegistry) handleBlob(w http.ResponseWriter, r *http.Request, fault *FaultConfig) { + // Parse: /v2//blobs/ + parts := strings.SplitN(r.URL.Path, "/blobs/", 2) + if len(parts) != 2 { + http.NotFound(w, r) + return + } + digest := parts[1] + + mr.mu.RLock() + data, ok := mr.blobs[digest] + mr.mu.RUnlock() + + switch r.Method { + case http.MethodGet: + if !ok { + http.NotFound(w, r) + return + } + w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data))) + w.Header().Set("Docker-Content-Digest", digest) + mr.writeWithFault(w, data, fault) + + case http.MethodHead: + if !ok { + http.NotFound(w, r) + return + } + w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data))) + w.Header().Set("Docker-Content-Digest", digest) + w.WriteHeader(http.StatusOK) + + default: + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + } +} + +func (mr *MockRegistry) handleBlobUpload(w http.ResponseWriter, r *http.Request) { + switch r.Method { + case http.MethodPost: + // Start upload — return a UUID location. + uuid := fmt.Sprintf("upload-%d", mr.uploadSeq.Add(1)) + mr.uploadsMu.Lock() + mr.uploads[uuid] = nil + mr.uploadsMu.Unlock() + + // Parse repo from path for location header. + parts := strings.SplitN(r.URL.Path, "/blobs/uploads/", 2) + repo := strings.TrimPrefix(parts[0], "/v2/") + location := fmt.Sprintf("/v2/%s/blobs/uploads/%s", repo, uuid) + w.Header().Set("Location", location) + w.Header().Set("Docker-Upload-UUID", uuid) + w.WriteHeader(http.StatusAccepted) + + case http.MethodPut: + // Complete upload — read body, store as blob. + // Path: /v2//blobs/uploads/?digest= + parts := strings.SplitN(r.URL.Path, "/blobs/uploads/", 2) + if len(parts) != 2 { + http.Error(w, "bad upload path", http.StatusBadRequest) + return + } + + body, err := io.ReadAll(r.Body) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + digest := r.URL.Query().Get("digest") + if digest == "" { + // Compute digest from body. + h := sha256.Sum256(body) + digest = fmt.Sprintf("sha256:%x", h) + } + + mr.mu.Lock() + mr.blobs[digest] = body + mr.mu.Unlock() + + w.Header().Set("Docker-Content-Digest", digest) + w.WriteHeader(http.StatusCreated) + + case http.MethodPatch: + // Chunked upload — accumulate data. + parts := strings.SplitN(r.URL.Path, "/blobs/uploads/", 2) + if len(parts) != 2 { + http.Error(w, "bad upload path", http.StatusBadRequest) + return + } + uuid := parts[1] + + body, err := io.ReadAll(r.Body) + if err != nil { + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + mr.uploadsMu.Lock() + mr.uploads[uuid] = append(mr.uploads[uuid], body...) + mr.uploadsMu.Unlock() + + w.Header().Set("Location", r.URL.Path) + w.WriteHeader(http.StatusAccepted) + + default: + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + } +} + +func (mr *MockRegistry) writeWithFault(w http.ResponseWriter, data []byte, fault *FaultConfig) { + if fault != nil && fault.FailAfterNBytes > 0 && fault.FailAfterNBytes < int64(len(data)) { + // Write partial data then close connection. + w.Write(data[:fault.FailAfterNBytes]) + // Hijack to force-close the connection. + if hj, ok := w.(http.Hijacker); ok { + conn, _, _ := hj.Hijack() + if conn != nil { + conn.Close() + } + } + return + } + w.Write(data) +} + +func (mr *MockRegistry) resolveFault(path string) *FaultConfig { + if mr.faults == nil { + return nil + } + // Check path-specific faults first. + if mr.faults.PathFaults != nil { + for suffix, pf := range mr.faults.PathFaults { + if strings.HasSuffix(path, suffix) { + return pf + } + } + } + return mr.faults +} + +func (mr *MockRegistry) trackPath(path string) { + val, _ := mr.pathCounts.LoadOrStore(path, &atomic.Int64{}) + val.(*atomic.Int64).Add(1) +} +``` + +- [ ] **Step 2: Write self-tests in `test/helpers/mockregistry_test.go`** + +```go +package helpers + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "testing" + "time" + + godigest "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestMockRegistry_PingAndBlobRoundTrip(t *testing.T) { + mr := NewMockRegistry() + defer mr.Close() + + // Ping. + resp, err := http.Get(fmt.Sprintf("http://%s/v2/", mr.Host())) + require.NoError(t, err) + assert.Equal(t, http.StatusOK, resp.StatusCode) + resp.Body.Close() + + // Add and fetch blob. + content := []byte("hello blob") + digest := mr.AddBlob(content) + + resp, err = http.Get(fmt.Sprintf("http://%s/v2/test/repo/blobs/%s", mr.Host(), digest)) + require.NoError(t, err) + defer resp.Body.Close() + assert.Equal(t, http.StatusOK, resp.StatusCode) + body, _ := io.ReadAll(resp.Body) + assert.Equal(t, content, body) +} + +func TestMockRegistry_ManifestRoundTrip(t *testing.T) { + mr := NewMockRegistry() + defer mr.Close() + + manifest := ocispec.Manifest{ + MediaType: ocispec.MediaTypeImageManifest, + Layers: []ocispec.Descriptor{ + {Digest: godigest.FromString("layer1"), Size: 6}, + }, + } + mr.AddManifest("test/repo:latest", manifest) + + resp, err := http.Get(fmt.Sprintf("http://%s/v2/test/repo/manifests/latest", mr.Host())) + require.NoError(t, err) + defer resp.Body.Close() + assert.Equal(t, http.StatusOK, resp.StatusCode) + + var got ocispec.Manifest + require.NoError(t, json.NewDecoder(resp.Body).Decode(&got)) + assert.Len(t, got.Layers, 1) +} + +func TestMockRegistry_FaultStatusCodeOverride(t *testing.T) { + mr := NewMockRegistry() + defer mr.Close() + mr.WithFault(&FaultConfig{StatusCodeOverride: http.StatusInternalServerError}) + + resp, err := http.Get(fmt.Sprintf("http://%s/v2/", mr.Host())) + require.NoError(t, err) + resp.Body.Close() + assert.Equal(t, http.StatusInternalServerError, resp.StatusCode) +} + +func TestMockRegistry_FaultFailOnNthRequest(t *testing.T) { + mr := NewMockRegistry() + defer mr.Close() + mr.WithFault(&FaultConfig{FailOnNthRequest: 2}) + + url := fmt.Sprintf("http://%s/v2/", mr.Host()) + + // Requests 1 and 2 should fail (count <= N). + resp, _ := http.Get(url) + resp.Body.Close() + assert.Equal(t, http.StatusInternalServerError, resp.StatusCode) + + resp, _ = http.Get(url) + resp.Body.Close() + assert.Equal(t, http.StatusInternalServerError, resp.StatusCode) + + // Request 3 should succeed. + resp, _ = http.Get(url) + resp.Body.Close() + assert.Equal(t, http.StatusOK, resp.StatusCode) +} + +func TestMockRegistry_FaultLatency(t *testing.T) { + mr := NewMockRegistry() + defer mr.Close() + mr.WithFault(&FaultConfig{LatencyPerRequest: 100 * time.Millisecond}) + + start := time.Now() + resp, err := http.Get(fmt.Sprintf("http://%s/v2/", mr.Host())) + require.NoError(t, err) + resp.Body.Close() + assert.GreaterOrEqual(t, time.Since(start), 80*time.Millisecond) +} + +func TestMockRegistry_RequestCounting(t *testing.T) { + mr := NewMockRegistry() + defer mr.Close() + + url := fmt.Sprintf("http://%s/v2/", mr.Host()) + for i := 0; i < 3; i++ { + resp, _ := http.Get(url) + resp.Body.Close() + } + + assert.Equal(t, int64(3), mr.RequestCount()) +} + +func TestMockRegistry_BlobUploadRoundTrip(t *testing.T) { + mr := NewMockRegistry() + defer mr.Close() + + // POST to start upload. + resp, err := http.Post( + fmt.Sprintf("http://%s/v2/test/repo/blobs/uploads/", mr.Host()), + "application/octet-stream", nil, + ) + require.NoError(t, err) + assert.Equal(t, http.StatusAccepted, resp.StatusCode) + location := resp.Header.Get("Location") + resp.Body.Close() + require.NotEmpty(t, location) + + // PUT to complete upload. + content := []byte("uploaded blob data") + dgst := godigest.FromBytes(content) + uploadURL := fmt.Sprintf("http://%s%s?digest=%s", mr.Host(), location, dgst.String()) + + req, _ := http.NewRequest(http.MethodPut, uploadURL, io.NopCloser( + io.NewSectionReader( + readerAt(content), 0, int64(len(content)), + ), + )) + // Simpler: just use bytes.NewReader + req, _ = http.NewRequest(http.MethodPut, uploadURL, bytesReader(content)) + resp, err = http.DefaultClient.Do(req) + require.NoError(t, err) + resp.Body.Close() + assert.Equal(t, http.StatusCreated, resp.StatusCode) + + // Verify blob exists. + assert.True(t, mr.BlobExists(dgst.String())) + data, ok := mr.GetBlob(dgst.String()) + assert.True(t, ok) + assert.Equal(t, content, data) +} + +// bytesReader is a helper to create a bytes.Reader. +func bytesReader(data []byte) io.Reader { + return io.NopCloser(ioReader(data)) +} + +// These are defined to avoid importing bytes in the test +// (the real implementation will use bytes.NewReader) +type readerAtImpl struct{ data []byte } + +func (r readerAtImpl) ReadAt(p []byte, off int64) (int, error) { + copy(p, r.data[off:]) + n := len(r.data) - int(off) + if n > len(p) { + n = len(p) + } + return n, io.EOF +} +func readerAt(data []byte) io.ReaderAt { return readerAtImpl{data} } +func ioReader(data []byte) io.Reader { + return &ioReaderImpl{data: data} +} + +type ioReaderImpl struct { + data []byte + pos int +} + +func (r *ioReaderImpl) Read(p []byte) (int, error) { + if r.pos >= len(r.data) { + return 0, io.EOF + } + n := copy(p, r.data[r.pos:]) + r.pos += n + return n, nil +} +``` + +Note: The upload test uses some helper functions to avoid unnecessary imports. The actual implementation should use `bytes.NewReader` directly. Clean up the helper functions and use `bytes.NewReader` and `bytes.NewBuffer` instead. + +- [ ] **Step 3: Run self-tests** + +Run: `cd /Users/zhaochen/modelpack/modctl/.claude/worktrees/silly-leaping-pelican && go test ./test/helpers/ -v -count=1` +Expected: All tests PASS. + +- [ ] **Step 4: Commit** + +```bash +git add test/helpers/mockregistry.go test/helpers/mockregistry_test.go +git commit -s -m "test: add shared mock OCI registry with fault injection" +``` + +--- + +### Task 2: Resource Tracking Utilities + +**Files:** +- Create: `test/helpers/tracking.go` + +- [ ] **Step 1: Create `test/helpers/tracking.go`** + +```go +package helpers + +import ( + "io" + "sync/atomic" + "testing" + + "github.com/stretchr/testify/assert" +) + +// TrackingReadCloser wraps an io.ReadCloser and records whether Close() was called. +type TrackingReadCloser struct { + io.ReadCloser + closed atomic.Bool +} + +// NewTrackingReadCloser wraps rc with close-tracking. +func NewTrackingReadCloser(rc io.ReadCloser) *TrackingReadCloser { + return &TrackingReadCloser{ReadCloser: rc} +} + +// Close marks the closer as closed and delegates to the underlying closer. +func (t *TrackingReadCloser) Close() error { + t.closed.Store(true) + return t.ReadCloser.Close() +} + +// WasClosed returns true if Close() was called. +func (t *TrackingReadCloser) WasClosed() bool { + return t.closed.Load() +} + +// AssertClosed asserts Close() was called. +func (t *TrackingReadCloser) AssertClosed(tb testing.TB) { + tb.Helper() + assert.True(tb, t.closed.Load(), "ReadCloser was not closed") +} + +// AssertNotClosed asserts Close() was NOT called (for reverse assertions on known bugs). +func (t *TrackingReadCloser) AssertNotClosed(tb testing.TB) { + tb.Helper() + assert.False(tb, t.closed.Load(), "ReadCloser was unexpectedly closed — bug may be fixed!") +} +``` + +- [ ] **Step 2: Run compilation check** + +Run: `go build ./test/helpers/` +Expected: No errors. + +- [ ] **Step 3: Commit** + +```bash +git add test/helpers/tracking.go +git commit -s -m "test: add TrackingReadCloser utility for resource leak detection" +``` + +--- + +### Task 3: Modelfile Exclude Integration Tests + +**Files:** +- Create: `pkg/modelfile/modelfile_integration_test.go` + +Existing unit tests in `modelfile_test.go` cover config.json parsing, content generation, hidden files, and workspace validation. These tests cover the **only untested integration path**: `ExcludePatterns` field flowing through `GenerateConfig` → `NewModelfileByWorkspace` → `PathFilter` → workspace walk. + +- [ ] **Step 1: Write tests in `pkg/modelfile/modelfile_integration_test.go`** + +```go +package modelfile + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + configmodelfile "github.com/modelpack/modctl/pkg/config/modelfile" +) + +func TestIntegration_ExcludePatterns_SinglePattern(t *testing.T) { + tempDir := t.TempDir() + + // Create workspace with model files and log files. + files := map[string]string{ + "model.bin": "model data", + "config.json": `{"model_type": "test"}`, + "train.log": "training log", + "eval.log": "eval log", + "run.py": "code", + } + for name, content := range files { + require.NoError(t, os.WriteFile(filepath.Join(tempDir, name), []byte(content), 0644)) + } + + config := &configmodelfile.GenerateConfig{ + Workspace: tempDir, + Name: "exclude-test", + ExcludePatterns: []string{"*.log"}, + } + + mf, err := NewModelfileByWorkspace(tempDir, config) + require.NoError(t, err) + + // Collect all files in the modelfile. + allFiles := append(append(append(mf.GetConfigs(), mf.GetModels()...), mf.GetCodes()...), mf.GetDocs()...) + + // .log files should be excluded. + for _, f := range allFiles { + assert.NotContains(t, f, ".log", "excluded file %s should not appear", f) + } + + // model.bin, config.json, run.py should still be present. + assert.Contains(t, mf.GetModels(), "model.bin") + assert.Contains(t, mf.GetConfigs(), "config.json") + assert.Contains(t, mf.GetCodes(), "run.py") +} + +func TestIntegration_ExcludePatterns_MultiplePatterns(t *testing.T) { + tempDir := t.TempDir() + + // Create workspace with various file types and a checkpoints directory. + dirs := []string{"checkpoints", "src"} + for _, d := range dirs { + require.NoError(t, os.MkdirAll(filepath.Join(tempDir, d), 0755)) + } + + files := map[string]string{ + "model.bin": "model", + "config.json": `{"model_type": "test"}`, + "debug.log": "log", + "checkpoints/step100.bin": "ckpt", + "src/train.py": "code", + } + for name, content := range files { + require.NoError(t, os.WriteFile(filepath.Join(tempDir, name), []byte(content), 0644)) + } + + config := &configmodelfile.GenerateConfig{ + Workspace: tempDir, + Name: "multi-exclude-test", + ExcludePatterns: []string{"*.log", "checkpoints/*"}, + } + + mf, err := NewModelfileByWorkspace(tempDir, config) + require.NoError(t, err) + + allFiles := append(append(append(mf.GetConfigs(), mf.GetModels()...), mf.GetCodes()...), mf.GetDocs()...) + + // .log files and checkpoints/* should be excluded. + for _, f := range allFiles { + assert.NotContains(t, f, ".log") + assert.NotContains(t, f, "checkpoints/") + } + + // Remaining files should be present. + assert.Contains(t, mf.GetModels(), "model.bin") + assert.Contains(t, mf.GetCodes(), "src/train.py") +} +``` + +- [ ] **Step 2: Run tests** + +Run: `go test ./pkg/modelfile/ -run TestIntegration_ExcludePatterns -v -count=1` +Expected: PASS. + +- [ ] **Step 3: Commit** + +```bash +git add pkg/modelfile/modelfile_integration_test.go +git commit -s -m "test: add integration tests for ExcludePatterns through NewModelfileByWorkspace" +``` + +--- + +### Task 4: ProgressBar Concurrency Tests + +**Files:** +- Create: `internal/pb/pb_integration_test.go` + +`internal/pb/` has zero tests. These test concurrency safety. + +- [ ] **Step 1: Write tests in `internal/pb/pb_integration_test.go`** + +```go +package pb + +import ( + "io" + "strings" + "sync" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestKnownBug_DisableProgress_DataRace(t *testing.T) { + // This test documents the data race in SetDisableProgress/Add. + // Run with: go test -race ./internal/pb/ -run TestKnownBug_DisableProgress_DataRace + // + // Known bug: global disableProgress bool has no atomic protection. + // See: https://github.com/modelpack/modctl/issues/493 + // + // When the bug is fixed (atomic.Bool), this test will still pass + // AND the -race detector will stop reporting the race. + // At that point, remove the KnownBug prefix. + + var wg sync.WaitGroup + pb := NewProgressBar(io.Discard) + pb.Start() + defer pb.Stop() + + // Concurrent SetDisableProgress. + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < 100; j++ { + SetDisableProgress(j%2 == 0) + } + }() + } + + // Concurrent Add. + for i := 0; i < 10; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + for j := 0; j < 100; j++ { + reader := strings.NewReader("test data") + pb.Add("test", "bar-"+string(rune('a'+id)), 9, reader) + } + }(i) + } + + wg.Wait() + // If we get here without panic, the test passes. + // The real detection is via -race flag. +} + +func TestIntegration_ProgressBar_ConcurrentUpdates(t *testing.T) { + pb := NewProgressBar(io.Discard) + pb.Start() + defer pb.Stop() + + // Disable progress to avoid mpb rendering issues in test. + SetDisableProgress(true) + defer SetDisableProgress(false) + + var wg sync.WaitGroup + + // Concurrent Add + Complete + Abort from multiple goroutines. + for i := 0; i < 20; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + name := string(rune('a' + id)) + reader := strings.NewReader("data") + pb.Add("prompt", name, 4, reader) + if id%3 == 0 { + pb.Complete(name, "done") + } else if id%3 == 1 { + pb.Abort(name, assert.AnError) + } + // id%3 == 2: just leave it + }(i) + } + + wg.Wait() + // No panic = success. +} +``` + +- [ ] **Step 2: Run tests (without race detector first)** + +Run: `go test ./internal/pb/ -run 'TestKnownBug_DisableProgress|TestIntegration_ProgressBar' -v -count=1` +Expected: PASS (both tests complete without panic). + +- [ ] **Step 3: Run with race detector to document the known bug** + +Run: `go test ./internal/pb/ -run TestKnownBug_DisableProgress -race -v -count=1 2>&1 || true` +Expected: Race detector reports data race on `disableProgress`. This is the documented bug (#493). The test itself still passes (no panic), but `-race` will exit non-zero. + +- [ ] **Step 4: Commit** + +```bash +git add internal/pb/pb_integration_test.go +git commit -s -m "test: add ProgressBar concurrency tests, document #493 data race" +``` + +--- + +### Task 5: Pull Integration Tests + +**Files:** +- Create: `pkg/backend/pull_integration_test.go` + +Pull tests use MockRegistry (remote) + mock Storage (local destination). The pattern follows `fetch_test.go:98` where `&backend{}` is constructed directly, but with a mock `store` field since Pull uses `b.store`. + +Reference: `pull.go:40-171` (Pull), `pull.go:174-243` (pullIfNotExist) + +- [ ] **Step 1: Write pull integration tests** + +Create `pkg/backend/pull_integration_test.go` with the following tests. Each test creates its own MockRegistry and mock Storage. + +```go +package backend + +import ( + "context" + "encoding/json" + "io" + "strings" + "testing" + "time" + + modelspec "github.com/modelpack/model-spec/specs-go/v1" + godigest "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/modelpack/modctl/pkg/config" + "github.com/modelpack/modctl/test/helpers" + storageMock "github.com/modelpack/modctl/test/mocks/storage" +) + +// newPullTestFixture creates a MockRegistry with one manifest + N blobs, +// and returns the registry, target string, blob contents, and config. +func newPullTestFixture(t *testing.T, blobCount int) (*helpers.MockRegistry, string, [][]byte, ocispec.Manifest) { + t.Helper() + mr := helpers.NewMockRegistry() + + blobs := make([][]byte, blobCount) + layers := make([]ocispec.Descriptor, blobCount) + for i := 0; i < blobCount; i++ { + content := []byte(strings.Repeat("x", 100+i)) + blobs[i] = content + digest := mr.AddBlob(content) + layers[i] = ocispec.Descriptor{ + MediaType: "application/octet-stream", + Digest: godigest.Digest(digest), + Size: int64(len(content)), + Annotations: map[string]string{ + modelspec.AnnotationFilepath: "layer" + string(rune('0'+i)), + }, + } + } + + // Config blob. + configContent := []byte(`{"model_type":"test"}`) + configDigest := mr.AddBlob(configContent) + configDesc := ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageConfig, + Digest: godigest.Digest(configDigest), + Size: int64(len(configContent)), + } + + manifest := ocispec.Manifest{ + MediaType: ocispec.MediaTypeImageManifest, + Config: configDesc, + Layers: layers, + } + mr.AddManifest("test/model:latest", manifest) + + return mr, mr.Host() + "/test/model:latest", blobs, manifest +} + +// newMockStorageForPull returns a mock Storage that accepts all writes. +func newMockStorageForPull() *storageMock.Storage { + s := storageMock.NewStorage(nil) + // StatBlob returns false (not exist) so pull writes the blob. + s.On("StatBlob", mock.Anything, mock.Anything, mock.Anything).Return(false, nil) + // StatManifest returns false (not exist) so pull writes the manifest. + s.On("StatManifest", mock.Anything, mock.Anything, mock.Anything).Return(false, nil) + // PushBlob accepts anything. + s.On("PushBlob", mock.Anything, mock.Anything, mock.Anything, mock.Anything). + Return(godigest.Digest(""), int64(0), nil) + // PushManifest accepts anything. + s.On("PushManifest", mock.Anything, mock.Anything, mock.Anything, mock.Anything). + Return(godigest.Digest(""), nil) + return s +} + +func newPullConfig() *config.Pull { + cfg := config.NewPull() + cfg.PlainHTTP = true + cfg.DisableProgress = true + cfg.Concurrency = 5 + return cfg +} + +// --- Dimension 1: Functional Correctness --- + +func TestIntegration_Pull_HappyPath(t *testing.T) { + mr, target, _, _ := newPullTestFixture(t, 2) + defer mr.Close() + + s := newMockStorageForPull() + b := &backend{store: s} + + err := b.Pull(context.Background(), target, newPullConfig()) + require.NoError(t, err) + + // Verify PushBlob was called for each layer + config. + s.AssertNumberOfCalls(t, "PushBlob", 2) // 2 layers + // Verify PushManifest was called. + s.AssertCalled(t, "PushManifest", mock.Anything, "test/model", "latest", mock.Anything) +} + +func TestIntegration_Pull_BlobAlreadyExists(t *testing.T) { + mr, target, _, _ := newPullTestFixture(t, 1) + defer mr.Close() + + s := newMockStorageForPull() + // Override: blob already exists locally. + s.ExpectedCalls = nil // Clear defaults + s.On("StatBlob", mock.Anything, mock.Anything, mock.Anything).Return(true, nil) + s.On("StatManifest", mock.Anything, mock.Anything, mock.Anything).Return(true, nil) + s.On("PushBlob", mock.Anything, mock.Anything, mock.Anything, mock.Anything). + Return(godigest.Digest(""), int64(0), nil) + s.On("PushManifest", mock.Anything, mock.Anything, mock.Anything, mock.Anything). + Return(godigest.Digest(""), nil) + + b := &backend{store: s} + err := b.Pull(context.Background(), target, newPullConfig()) + require.NoError(t, err) + + // PushBlob should NOT have been called since blob already exists. + s.AssertNotCalled(t, "PushBlob", mock.Anything, mock.Anything, mock.Anything, mock.Anything) +} + +func TestIntegration_Pull_ConcurrentLayers(t *testing.T) { + mr, target, _, _ := newPullTestFixture(t, 5) + defer mr.Close() + + s := newMockStorageForPull() + b := &backend{store: s} + + err := b.Pull(context.Background(), target, newPullConfig()) + require.NoError(t, err) + + // All 5 blobs should have been pushed to storage. + s.AssertNumberOfCalls(t, "PushBlob", 5) +} + +// --- Dimension 2: Network Errors (fast) --- + +func TestIntegration_Pull_ContextTimeout(t *testing.T) { + mr, target, _, _ := newPullTestFixture(t, 1) + defer mr.Close() + mr.WithFault(&helpers.FaultConfig{LatencyPerRequest: 5 * time.Second}) + + s := newMockStorageForPull() + b := &backend{store: s} + + ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) + defer cancel() + + err := b.Pull(ctx, target, newPullConfig()) + assert.Error(t, err) + assert.Contains(t, err.Error(), "context") +} + +func TestIntegration_Pull_PartialResponse(t *testing.T) { + mr, target, _, _ := newPullTestFixture(t, 1) + defer mr.Close() + + // Fail after 10 bytes on blob fetches. + mr.WithFault(&helpers.FaultConfig{ + PathFaults: map[string]*helpers.FaultConfig{ + // Match any blob path. + "/blobs/": {FailAfterNBytes: 10}, + }, + }) + + s := newMockStorageForPull() + b := &backend{store: s} + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := b.Pull(ctx, target, newPullConfig()) + assert.Error(t, err) +} + +func TestIntegration_Pull_ManifestOK_BlobFails(t *testing.T) { + mr, target, _, manifest := newPullTestFixture(t, 2) + defer mr.Close() + + // Only fail on the second blob's digest. + failDigest := manifest.Layers[1].Digest.String() + mr.WithFault(&helpers.FaultConfig{ + PathFaults: map[string]*helpers.FaultConfig{ + failDigest: {StatusCodeOverride: 500}, + }, + }) + + s := newMockStorageForPull() + b := &backend{store: s} + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := b.Pull(ctx, target, newPullConfig()) + assert.Error(t, err) +} + +// --- Dimension 4: Concurrency Safety --- + +func TestIntegration_Pull_ConcurrentPartialFailure(t *testing.T) { + mr, target, _, manifest := newPullTestFixture(t, 5) + defer mr.Close() + + // Fail 2 out of 5 blobs. + mr.WithFault(&helpers.FaultConfig{ + PathFaults: map[string]*helpers.FaultConfig{ + manifest.Layers[1].Digest.String(): {StatusCodeOverride: 500}, + manifest.Layers[3].Digest.String(): {StatusCodeOverride: 500}, + }, + }) + + s := newMockStorageForPull() + b := &backend{store: s} + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + err := b.Pull(ctx, target, newPullConfig()) + assert.Error(t, err) + // Key assertion: no hang, error returned within timeout. +} + +// --- Dimension 6: Data Integrity --- + +func TestIntegration_Pull_TruncatedBlob(t *testing.T) { + mr := helpers.NewMockRegistry() + defer mr.Close() + + // Add a blob but serve truncated content via fault. + content := []byte(strings.Repeat("x", 200)) + digest := mr.AddBlob(content) + + configContent := []byte(`{}`) + configDigest := mr.AddBlob(configContent) + + manifest := ocispec.Manifest{ + MediaType: ocispec.MediaTypeImageManifest, + Config: ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageConfig, + Digest: godigest.Digest(configDigest), + Size: int64(len(configContent)), + }, + Layers: []ocispec.Descriptor{ + { + MediaType: "application/octet-stream", + Digest: godigest.Digest(digest), + Size: int64(len(content)), + }, + }, + } + mr.AddManifest("test/model:latest", manifest) + + // Serve only first 50 bytes. + mr.WithFault(&helpers.FaultConfig{ + PathFaults: map[string]*helpers.FaultConfig{ + digest: {FailAfterNBytes: 50}, + }, + }) + + s := newMockStorageForPull() + b := &backend{store: s} + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := b.Pull(ctx, target(mr), newPullConfig()) + assert.Error(t, err, "truncated blob should cause error (digest mismatch or read error)") +} + +func TestIntegration_Pull_CorruptedBlob(t *testing.T) { + mr := helpers.NewMockRegistry() + defer mr.Close() + + // Add a blob with correct digest but we'll serve wrong content. + realContent := []byte("real content") + realDigest := godigest.FromBytes(realContent) + + // Store wrong content under the real digest key. + mr.AddBlob(realContent) // stores under correct digest + // Now overwrite with corrupt data — need direct access. + // Instead: create manifest referencing one digest, but serve different content. + // Simpler approach: use a custom handler. For now, test that correct content passes + // and we trust the validateDigest path. + // Actually let's test differently: store corrupt data manually. + + configContent := []byte(`{}`) + configDigest := mr.AddBlob(configContent) + + manifest := ocispec.Manifest{ + MediaType: ocispec.MediaTypeImageManifest, + Config: ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageConfig, + Digest: godigest.Digest(godigest.FromBytes(configContent).String()), + Size: int64(len(configContent)), + }, + Layers: []ocispec.Descriptor{ + { + MediaType: "application/octet-stream", + Digest: realDigest, + Size: int64(len(realContent)), + }, + }, + } + mr.AddManifest("test/model:latest", manifest) + + // Note: To serve corrupt data, we'd need to modify the MockRegistry to allow + // storing data under a mismatched digest. For this test, add a method or + // directly manipulate internals. The implementer should add a + // MockRegistry.AddBlobWithDigest(digest, content) method for this purpose. + + s := newMockStorageForPull() + b := &backend{store: s} + + err := b.Pull(context.Background(), mr.Host()+"/test/model:latest", newPullConfig()) + // With correct data, this should pass. The corruption test needs AddBlobWithDigest. + // Implementer: add AddBlobWithDigest to MockRegistry, store corrupt bytes under realDigest. + require.NoError(t, err) // Placeholder — replace with corruption test. +} + +// --- Dimension 7: Graceful Shutdown --- + +func TestIntegration_Pull_ContextCancelMidDownload(t *testing.T) { + mr, target, _, _ := newPullTestFixture(t, 3) + defer mr.Close() + + // Add latency to simulate slow download. + mr.WithFault(&helpers.FaultConfig{LatencyPerRequest: 500 * time.Millisecond}) + + s := newMockStorageForPull() + b := &backend{store: s} + + ctx, cancel := context.WithCancel(context.Background()) + + // Cancel after 300ms — mid-download. + go func() { + time.Sleep(300 * time.Millisecond) + cancel() + }() + + err := b.Pull(ctx, target, newPullConfig()) + assert.Error(t, err) + assert.Contains(t, err.Error(), "context") +} + +// --- Dimension 8: Idempotency --- + +func TestIntegration_Pull_Idempotent(t *testing.T) { + mr, target, _, _ := newPullTestFixture(t, 2) + defer mr.Close() + + s := newMockStorageForPull() + b := &backend{store: s} + + // First pull. + err := b.Pull(context.Background(), target, newPullConfig()) + require.NoError(t, err) + + countAfterFirst := mr.RequestCount() + + // Now make storage report blobs exist. + s.ExpectedCalls = nil + s.On("StatBlob", mock.Anything, mock.Anything, mock.Anything).Return(true, nil) + s.On("StatManifest", mock.Anything, mock.Anything, mock.Anything).Return(true, nil) + s.On("PushBlob", mock.Anything, mock.Anything, mock.Anything, mock.Anything). + Return(godigest.Digest(""), int64(0), nil) + s.On("PushManifest", mock.Anything, mock.Anything, mock.Anything, mock.Anything). + Return(godigest.Digest(""), nil) + + // Second pull — should skip blob fetches but still fetch manifest. + err = b.Pull(context.Background(), target, newPullConfig()) + require.NoError(t, err) + + // Verify second pull made fewer requests (manifest fetch only, no blob fetches). + countAfterSecond := mr.RequestCount() + blobRequestsDelta := countAfterSecond - countAfterFirst + // Should be: 1 manifest GET + possibly HEAD checks, but NO blob GETs. + // The exact count depends on oras-go internals, but should be < first pull count. + assert.Less(t, blobRequestsDelta, countAfterFirst, + "second pull should make fewer requests than first") +} + +// target is a helper to construct a target string from MockRegistry. +func target(mr *helpers.MockRegistry) string { + return mr.Host() + "/test/model:latest" +} +``` + +**Important notes for implementer:** +- The mock storage setup pattern (`newMockStorageForPull()`) may need adjustment based on exact mock method signatures in `test/mocks/storage/storage.go`. Check the generated mock's `NewStorage` constructor — it may take `*testing.T` instead of `nil`. +- `TestIntegration_Pull_CorruptedBlob` needs a `MockRegistry.AddBlobWithDigest(digest string, content []byte)` method that stores content under a specific digest regardless of actual hash. Add this to `mockregistry.go`. +- Some tests may need the `target` helper or direct `mr.Host()+"/test/model:latest"` depending on the fixture setup. + +- [ ] **Step 2: Add `AddBlobWithDigest` to MockRegistry** + +Add to `test/helpers/mockregistry.go`: +```go +// AddBlobWithDigest stores content under an explicit digest (for corruption tests). +func (mr *MockRegistry) AddBlobWithDigest(digest string, content []byte) *MockRegistry { + mr.mu.Lock() + mr.blobs[digest] = content + mr.mu.Unlock() + return mr +} +``` + +Then fix `TestIntegration_Pull_CorruptedBlob` to use it: +```go +func TestIntegration_Pull_CorruptedBlob(t *testing.T) { + mr := helpers.NewMockRegistry() + defer mr.Close() + + // Create manifest referencing a blob by its real digest, + // but serve different (corrupt) content under that digest. + realContent := []byte("real content that should be here") + corruptContent := []byte("THIS IS CORRUPT DATA!!!!!!!!!!") + realDigest := godigest.FromBytes(realContent).String() + mr.AddBlobWithDigest(realDigest, corruptContent) + + configContent := []byte(`{}`) + configDigest := mr.AddBlob(configContent) + manifest := ocispec.Manifest{ + MediaType: ocispec.MediaTypeImageManifest, + Config: ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageConfig, + Digest: godigest.Digest(configDigest), + Size: int64(len(configContent)), + }, + Layers: []ocispec.Descriptor{{ + MediaType: "application/octet-stream", + Digest: godigest.Digest(realDigest), + Size: int64(len(realContent)), // Size matches real, content doesn't + }}, + } + mr.AddManifest("test/model:latest", manifest) + + s := newMockStorageForPull() + b := &backend{store: s} + + err := b.Pull(context.Background(), mr.Host()+"/test/model:latest", newPullConfig()) + assert.Error(t, err, "corrupted blob should fail digest validation") + assert.Contains(t, err.Error(), "digest") +} +``` + +- [ ] **Step 3: Run pull integration tests** + +Run: `go test ./pkg/backend/ -run TestIntegration_Pull -v -count=1 -timeout 60s` +Expected: All tests PASS except possibly the corruption test if sizes differ. Adjust as needed. + +- [ ] **Step 4: Commit** + +```bash +git add pkg/backend/pull_integration_test.go test/helpers/mockregistry.go +git commit -s -m "test: add pull integration tests covering 6 dimensions" +``` + +--- + +### Task 6: Push Integration Tests + +**Files:** +- Create: `pkg/backend/push_integration_test.go` + +Push uses mock Storage as source (local) and MockRegistry as destination (remote). The known-bug tests for ReadCloser leak (#491) use TrackingReadCloser injected through mock Storage's `PullBlob` method. + +Reference: `push.go:38-128` (Push), `push.go:130-193` (pushIfNotExist) + +- [ ] **Step 1: Write push integration tests** + +Create `pkg/backend/push_integration_test.go`: + +```go +package backend + +import ( + "bytes" + "context" + "encoding/json" + "io" + "testing" + "time" + + godigest "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/modelpack/modctl/pkg/config" + "github.com/modelpack/modctl/test/helpers" + storageMock "github.com/modelpack/modctl/test/mocks/storage" +) + +type pushFixture struct { + registry *helpers.MockRegistry + target string + manifest ocispec.Manifest + manifestRaw []byte + blobContent []byte + blobDigest godigest.Digest + configRaw []byte + configDigest godigest.Digest +} + +func newPushTestFixture(t *testing.T) *pushFixture { + t.Helper() + mr := helpers.NewMockRegistry() + + blobContent := []byte("push test blob content") + blobDigest := godigest.FromBytes(blobContent) + configRaw := []byte(`{"model_type":"test"}`) + configDigest := godigest.FromBytes(configRaw) + + manifest := ocispec.Manifest{ + MediaType: ocispec.MediaTypeImageManifest, + Config: ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageConfig, + Digest: configDigest, + Size: int64(len(configRaw)), + }, + Layers: []ocispec.Descriptor{{ + MediaType: "application/octet-stream", + Digest: blobDigest, + Size: int64(len(blobContent)), + }}, + } + manifestRaw, _ := json.Marshal(manifest) + + return &pushFixture{ + registry: mr, + target: mr.Host() + "/test/model:latest", + manifest: manifest, + manifestRaw: manifestRaw, + blobContent: blobContent, + blobDigest: blobDigest, + configRaw: configRaw, + configDigest: configDigest, + } +} + +func newMockStorageForPush(f *pushFixture) *storageMock.Storage { + s := storageMock.NewStorage(nil) + s.On("PullManifest", mock.Anything, "test/model", "latest"). + Return(f.manifestRaw, godigest.FromBytes(f.manifestRaw), nil) + s.On("PullBlob", mock.Anything, mock.Anything, f.blobDigest.String()). + Return(io.NopCloser(bytes.NewReader(f.blobContent)), nil) + s.On("PullBlob", mock.Anything, mock.Anything, f.configDigest.String()). + Return(io.NopCloser(bytes.NewReader(f.configRaw)), nil) + return s +} + +func newPushConfig() *config.Push { + cfg := config.NewPush() + cfg.PlainHTTP = true + cfg.Concurrency = 5 + return cfg +} + +// --- Dimension 1: Functional Correctness --- + +func TestIntegration_Push_HappyPath(t *testing.T) { + f := newPushTestFixture(t) + defer f.registry.Close() + + s := newMockStorageForPush(f) + b := &backend{store: s} + + err := b.Push(context.Background(), f.target, newPushConfig()) + require.NoError(t, err) + + // Verify blob was received by registry. + assert.True(t, f.registry.BlobExists(f.blobDigest.String()), + "blob should have been pushed to registry") + + // Verify manifest was received. + assert.True(t, f.registry.ManifestExists("test/model:latest"), + "manifest should have been pushed to registry") +} + +func TestIntegration_Push_BlobAlreadyExists(t *testing.T) { + f := newPushTestFixture(t) + defer f.registry.Close() + + // Pre-populate registry with the blob and manifest. + f.registry.AddBlobWithDigest(f.blobDigest.String(), f.blobContent) + f.registry.AddBlobWithDigest(f.configDigest.String(), f.configRaw) + f.registry.AddManifest("test/model:latest", f.manifest) + + s := newMockStorageForPush(f) + b := &backend{store: s} + + err := b.Push(context.Background(), f.target, newPushConfig()) + require.NoError(t, err) + + // PullBlob should NOT have been called since remote already has the blob. + s.AssertNotCalled(t, "PullBlob", mock.Anything, mock.Anything, f.blobDigest.String()) +} + +// --- Dimension 2: Network Errors --- + +func TestIntegration_Push_ManifestPushFails(t *testing.T) { + f := newPushTestFixture(t) + defer f.registry.Close() + + // Fail only manifest push (PUT /manifests/). + f.registry.WithFault(&helpers.FaultConfig{ + PathFaults: map[string]*helpers.FaultConfig{ + "/manifests/": {StatusCodeOverride: 500}, + }, + }) + + s := newMockStorageForPush(f) + b := &backend{store: s} + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + err := b.Push(ctx, f.target, newPushConfig()) + assert.Error(t, err) +} + +// --- Dimension 3: Resource Leak Detection (Known Bugs) --- + +func TestKnownBug_Push_ReadCloserNotClosed_SuccessPath(t *testing.T) { + // BUG: push.go:175 — PullBlob returns io.ReadCloser that is never closed. + // The content is wrapped with io.NopCloser before passing to dst.Blobs().Push(), + // so even on success, the original ReadCloser.Close() is never called. + // See: https://github.com/modelpack/modctl/issues/491 + f := newPushTestFixture(t) + defer f.registry.Close() + + tracker := helpers.NewTrackingReadCloser(io.NopCloser(bytes.NewReader(f.blobContent))) + + s := storageMock.NewStorage(nil) + s.On("PullManifest", mock.Anything, mock.Anything, mock.Anything). + Return(f.manifestRaw, godigest.FromBytes(f.manifestRaw), nil) + // Return tracker for blob pull. + s.On("PullBlob", mock.Anything, mock.Anything, f.blobDigest.String()). + Return(io.ReadCloser(tracker), nil) + // Config uses normal reader. + s.On("PullBlob", mock.Anything, mock.Anything, f.configDigest.String()). + Return(io.NopCloser(bytes.NewReader(f.configRaw)), nil) + + b := &backend{store: s} + + err := b.Push(context.Background(), f.target, newPushConfig()) + require.NoError(t, err, "push should succeed") + + // REVERSE ASSERTION: assert the bug EXISTS (Close was NOT called). + // When #491 is fixed, this will fail — flip to AssertClosed and remove KnownBug prefix. + tracker.AssertNotClosed(t) +} + +func TestKnownBug_Push_ReadCloserNotClosed_ErrorPath(t *testing.T) { + // Same bug on error path. + // See: https://github.com/modelpack/modctl/issues/491 + f := newPushTestFixture(t) + defer f.registry.Close() + + // Make blob push fail. + f.registry.WithFault(&helpers.FaultConfig{ + PathFaults: map[string]*helpers.FaultConfig{ + "/blobs/uploads/": {StatusCodeOverride: 500}, + }, + }) + + tracker := helpers.NewTrackingReadCloser(io.NopCloser(bytes.NewReader(f.blobContent))) + + s := storageMock.NewStorage(nil) + s.On("PullManifest", mock.Anything, mock.Anything, mock.Anything). + Return(f.manifestRaw, godigest.FromBytes(f.manifestRaw), nil) + s.On("PullBlob", mock.Anything, mock.Anything, f.blobDigest.String()). + Return(io.ReadCloser(tracker), nil) + s.On("PullBlob", mock.Anything, mock.Anything, f.configDigest.String()). + Return(io.NopCloser(bytes.NewReader(f.configRaw)), nil) + + b := &backend{store: s} + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + err := b.Push(ctx, f.target, newPushConfig()) + assert.Error(t, err) + + // REVERSE ASSERTION: Close was NOT called even on error path. + tracker.AssertNotClosed(t) +} + +// --- Dimension 6: Data Integrity --- + +func TestIntegration_Push_VerifyBlobIntegrity(t *testing.T) { + f := newPushTestFixture(t) + defer f.registry.Close() + + s := newMockStorageForPush(f) + b := &backend{store: s} + + err := b.Push(context.Background(), f.target, newPushConfig()) + require.NoError(t, err) + + // Verify the registry received the exact bytes. + received, ok := f.registry.GetBlob(f.blobDigest.String()) + require.True(t, ok, "blob should exist in registry") + assert.Equal(t, f.blobContent, received, "pushed blob content should match source") +} + +// --- Dimension 7: Graceful Shutdown --- + +func TestIntegration_Push_ContextCancelMidUpload(t *testing.T) { + f := newPushTestFixture(t) + defer f.registry.Close() + + // Add latency to simulate slow upload. + f.registry.WithFault(&helpers.FaultConfig{LatencyPerRequest: 500 * time.Millisecond}) + + s := newMockStorageForPush(f) + b := &backend{store: s} + + ctx, cancel := context.WithCancel(context.Background()) + go func() { + time.Sleep(200 * time.Millisecond) + cancel() + }() + + err := b.Push(ctx, f.target, newPushConfig()) + assert.Error(t, err) +} + +// --- Dimension 8: Idempotency --- + +func TestIntegration_Push_Idempotent(t *testing.T) { + f := newPushTestFixture(t) + defer f.registry.Close() + + s := newMockStorageForPush(f) + b := &backend{store: s} + + // First push. + err := b.Push(context.Background(), f.target, newPushConfig()) + require.NoError(t, err) + + countAfterFirst := f.registry.RequestCount() + + // Second push — registry already has all blobs. + err = b.Push(context.Background(), f.target, newPushConfig()) + require.NoError(t, err) + + countAfterSecond := f.registry.RequestCount() + secondPushRequests := countAfterSecond - countAfterFirst + assert.Less(t, secondPushRequests, countAfterFirst, + "second push should make fewer requests (blobs already exist)") +} +``` + +- [ ] **Step 2: Run push integration tests** + +Run: `go test ./pkg/backend/ -run TestIntegration_Push -v -count=1 -timeout 60s` +Expected: PASS (including KnownBug tests which use reverse assertions). + +- [ ] **Step 3: Run KnownBug tests specifically** + +Run: `go test ./pkg/backend/ -run TestKnownBug_Push -v -count=1` +Expected: PASS (reverse assertions confirm the bug exists). + +- [ ] **Step 4: Commit** + +```bash +git add pkg/backend/push_integration_test.go +git commit -s -m "test: add push integration tests, document ReadCloser leak #491" +``` + +--- + +### Task 7: CLI Integration Tests + +**Files:** +- Create: `cmd/modelfile/generate_integration_test.go` +- Create: `cmd/cli_integration_test.go` + +The `cmd/` directory has zero tests. These test cobra command execution. + +- [ ] **Step 1: Write `cmd/modelfile/generate_integration_test.go`** + +```go +package modelfile + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestIntegration_CLI_Generate_BasicFlags(t *testing.T) { + tempDir := t.TempDir() + outputDir := t.TempDir() + + // Create minimal workspace. + require.NoError(t, os.WriteFile(filepath.Join(tempDir, "model.bin"), []byte("data"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(tempDir, "config.json"), []byte(`{"model_type":"llama"}`), 0644)) + + // Reset global config for test isolation. + generateConfig = NewGenerateConfigForTest() + + cmd := generateCmd + cmd.SetArgs([]string{ + tempDir, + "--name", "test-model", + "--arch", "transformer", + "--family", "llama3", + "--output", outputDir, + }) + + err := cmd.Execute() + require.NoError(t, err) + + // Verify Modelfile was written. + modelfilePath := filepath.Join(outputDir, "Modelfile") + content, err := os.ReadFile(modelfilePath) + require.NoError(t, err) + assert.Contains(t, string(content), "NAME test-model") + assert.Contains(t, string(content), "ARCH transformer") + assert.Contains(t, string(content), "FAMILY llama3") +} + +func TestIntegration_CLI_Generate_OutputAndOverwrite(t *testing.T) { + tempDir := t.TempDir() + outputDir := t.TempDir() + + require.NoError(t, os.WriteFile(filepath.Join(tempDir, "model.bin"), []byte("data"), 0644)) + + // Pre-create Modelfile. + modelfilePath := filepath.Join(outputDir, "Modelfile") + require.NoError(t, os.WriteFile(modelfilePath, []byte("existing"), 0644)) + + // Without --overwrite should fail. + generateConfig = NewGenerateConfigForTest() + cmd := generateCmd + cmd.SetArgs([]string{tempDir, "--output", outputDir}) + err := cmd.Execute() + assert.Error(t, err) + assert.Contains(t, err.Error(), "already exists") + + // With --overwrite should succeed. + generateConfig = NewGenerateConfigForTest() + cmd.SetArgs([]string{tempDir, "--output", outputDir, "--overwrite"}) + err = cmd.Execute() + assert.NoError(t, err) +} + +func TestIntegration_CLI_Generate_MutualExclusion(t *testing.T) { + generateConfig = NewGenerateConfigForTest() + cmd := generateCmd + cmd.SetArgs([]string{"./workspace", "--model-url", "some/model"}) + err := cmd.Execute() + assert.Error(t, err) + assert.Contains(t, err.Error(), "mutually exclusive") +} + +// NewGenerateConfigForTest returns a fresh GenerateConfig to avoid test pollution. +// The implementer should check if configmodelfile.NewGenerateConfig() can be used directly +// or if a test-specific helper is needed based on the global generateConfig variable pattern. +func NewGenerateConfigForTest() *configmodelfile.GenerateConfig { + return configmodelfile.NewGenerateConfig() +} +``` + +**Note for implementer:** The global `generateConfig` variable in `generate.go:32` makes cobra command tests tricky — each test must reset it. Check if `generateCmd` can be reconstructed or if resetting the global is sufficient. You may need to import `configmodelfile` in the test file. Also, cobra's `Execute()` calls `RunE` which calls `generateConfig.Convert()` and `generateConfig.Validate()` before `runGenerate()`, so the Validate step may error before the mutually-exclusive check in `RunE`. + +- [ ] **Step 2: Write `cmd/cli_integration_test.go`** + +```go +package cmd + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/modelpack/modctl/pkg/config" +) + +func TestIntegration_CLI_ConcurrencyZero(t *testing.T) { + cfg := config.NewPull() + cfg.Concurrency = 0 + err := cfg.Validate() + assert.Error(t, err) + assert.Contains(t, err.Error(), "concurrency") +} + +func TestIntegration_CLI_ConcurrencyNegative(t *testing.T) { + cfg := config.NewPull() + cfg.Concurrency = -1 + err := cfg.Validate() + assert.Error(t, err) + assert.Contains(t, err.Error(), "concurrency") +} + +func TestIntegration_CLI_ExtractFromRemoteNoDir(t *testing.T) { + cfg := config.NewPull() + cfg.ExtractFromRemote = true + cfg.ExtractDir = "" + err := cfg.Validate() + assert.Error(t, err) + assert.Contains(t, err.Error(), "extract dir") +} +``` + +- [ ] **Step 3: Run CLI tests** + +Run: `go test ./cmd/modelfile/ -run TestIntegration_CLI_Generate -v -count=1` +Run: `go test ./cmd/ -run TestIntegration_CLI -v -count=1` +Expected: PASS. + +- [ ] **Step 4: Commit** + +```bash +git add cmd/modelfile/generate_integration_test.go cmd/cli_integration_test.go +git commit -s -m "test: add CLI integration tests for generate command and config validation" +``` + +--- + +### Task 8: Slow Tests (Retry-Dependent) + +**Files:** +- Create: `pkg/backend/pull_slowtest_test.go` +- Create: `pkg/backend/push_slowtest_test.go` + +These tests exercise real retry backoff and take 30+ seconds. Gated behind `//go:build slowtest`. + +- [ ] **Step 1: Write `pkg/backend/pull_slowtest_test.go`** + +```go +//go:build slowtest + +package backend + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/modelpack/modctl/test/helpers" +) + +func TestSlow_Pull_RetryOnTransientError(t *testing.T) { + mr, target, _, _ := newPullTestFixture(t, 1) + defer mr.Close() + + // First 2 requests fail, 3rd succeeds. + mr.WithFault(&helpers.FaultConfig{FailOnNthRequest: 2}) + + s := newMockStorageForPull() + b := &backend{store: s} + + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + err := b.Pull(ctx, target, newPullConfig()) + require.NoError(t, err, "pull should succeed after transient failures") +} + +func TestSlow_Pull_RetryExhausted(t *testing.T) { + mr, target, _, _ := newPullTestFixture(t, 1) + defer mr.Close() + + // All requests fail with 500. + mr.WithFault(&helpers.FaultConfig{StatusCodeOverride: 500}) + + s := newMockStorageForPull() + b := &backend{store: s} + + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + err := b.Pull(ctx, target, newPullConfig()) + assert.Error(t, err, "pull should fail after retry exhaustion") +} + +func TestSlow_Pull_RateLimited(t *testing.T) { + mr, target, _, _ := newPullTestFixture(t, 1) + defer mr.Close() + + // First 3 requests return 429, then succeed. + mr.WithFault(&helpers.FaultConfig{FailOnNthRequest: 3}) + + s := newMockStorageForPull() + b := &backend{store: s} + + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + err := b.Pull(ctx, target, newPullConfig()) + // Should eventually succeed after backoff. + require.NoError(t, err) +} + +func TestKnownBug_Pull_AuthErrorStillRetries(t *testing.T) { + // BUG: retry.Do() retries ALL errors including 401. + // Auth errors should fail immediately. + // See: https://github.com/modelpack/modctl/issues/494 + mr, target, _, _ := newPullTestFixture(t, 1) + defer mr.Close() + + mr.WithFault(&helpers.FaultConfig{StatusCodeOverride: 401}) + + s := newMockStorageForPull() + b := &backend{store: s} + + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + start := time.Now() + err := b.Pull(ctx, target, newPullConfig()) + elapsed := time.Since(start) + assert.Error(t, err) + + // REVERSE ASSERTION: Currently auth errors ARE retried (takes 30s+). + // When #494 is fixed, auth errors should fail immediately (<2s). + // Flip this assertion when fixed. + assert.Greater(t, elapsed, 10*time.Second, + "If this fails, auth errors are no longer retried — #494 may be fixed! "+ + "Update to assert.Less(t, elapsed, 5*time.Second)") +} +``` + +- [ ] **Step 2: Write `pkg/backend/push_slowtest_test.go`** + +```go +//go:build slowtest + +package backend + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/modelpack/modctl/test/helpers" +) + +func TestSlow_Push_RetryOnTransientError(t *testing.T) { + f := newPushTestFixture(t) + defer f.registry.Close() + + // First 2 upload requests fail. + f.registry.WithFault(&helpers.FaultConfig{ + PathFaults: map[string]*helpers.FaultConfig{ + "/blobs/uploads/": {FailOnNthRequest: 2}, + }, + }) + + s := newMockStorageForPush(f) + b := &backend{store: s} + + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + err := b.Push(ctx, f.target, newPushConfig()) + require.NoError(t, err, "push should succeed after transient upload failures") +} +``` + +- [ ] **Step 3: Verify slow tests compile (don't run)** + +Run: `go test ./pkg/backend/ -tags slowtest -list 'TestSlow|TestKnownBug_Pull_Auth' -count=1` +Expected: Lists test names without running them. + +- [ ] **Step 4: Commit** + +```bash +git add pkg/backend/pull_slowtest_test.go pkg/backend/push_slowtest_test.go +git commit -s -m "test: add retry-dependent slow tests (//go:build slowtest)" +``` + +--- + +### Task 9: Stress Tests + +**Files:** +- Create: `pkg/modelfile/modelfile_stress_test.go` +- Create: `pkg/backend/pull_stress_test.go` + +Gated behind `//go:build stress`. + +- [ ] **Step 1: Write `pkg/modelfile/modelfile_stress_test.go`** + +```go +//go:build stress + +package modelfile + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/require" + + configmodelfile "github.com/modelpack/modctl/pkg/config/modelfile" +) + +func TestStress_NearMaxFileCount(t *testing.T) { + tempDir := t.TempDir() + + // Create 2040 files (near MaxWorkspaceFileCount=2048). + // Include at least one model file to avoid "no model/code/dataset" error. + require.NoError(t, os.WriteFile(filepath.Join(tempDir, "model.bin"), []byte("model"), 0644)) + for i := 0; i < 2039; i++ { + name := fmt.Sprintf("file_%04d.py", i) + require.NoError(t, os.WriteFile(filepath.Join(tempDir, name), []byte("x"), 0644)) + } + + config := &configmodelfile.GenerateConfig{ + Workspace: tempDir, + Name: "stress-test", + } + + mf, err := NewModelfileByWorkspace(tempDir, config) + require.NoError(t, err, "should handle near-limit file count") + require.NotNil(t, mf) +} + +func TestStress_DeeplyNestedDirs(t *testing.T) { + tempDir := t.TempDir() + + // Create 100-level nested directory with a model file at the bottom. + parts := make([]string, 100) + for i := range parts { + parts[i] = fmt.Sprintf("d%d", i) + } + deepDir := filepath.Join(tempDir, strings.Join(parts, string(filepath.Separator))) + require.NoError(t, os.MkdirAll(deepDir, 0755)) + require.NoError(t, os.WriteFile(filepath.Join(deepDir, "model.bin"), []byte("deep model"), 0644)) + + config := &configmodelfile.GenerateConfig{ + Workspace: tempDir, + Name: "deep-test", + } + + mf, err := NewModelfileByWorkspace(tempDir, config) + require.NoError(t, err, "should handle deeply nested directories") + require.NotNil(t, mf) + require.Contains(t, mf.GetModels(), filepath.Join(strings.Join(parts, string(filepath.Separator)), "model.bin")) +} +``` + +- [ ] **Step 2: Write `pkg/backend/pull_stress_test.go`** + +```go +//go:build stress + +package backend + +import ( + "context" + "runtime" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestStress_Pull_ManyLayers(t *testing.T) { + mr, target, _, _ := newPullTestFixture(t, 50) + defer mr.Close() + + s := newMockStorageForPull() + b := &backend{store: s} + + cfg := newPullConfig() + cfg.Concurrency = 10 + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + err := b.Pull(ctx, target, cfg) + require.NoError(t, err, "should handle 50 concurrent layers") + s.AssertNumberOfCalls(t, "PushBlob", 50) +} + +func TestStress_Pull_RepeatedCycles(t *testing.T) { + mr, target, _, _ := newPullTestFixture(t, 2) + defer mr.Close() + + s := newMockStorageForPull() + b := &backend{store: s} + + baseGoroutines := runtime.NumGoroutine() + + for i := 0; i < 100; i++ { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + err := b.Pull(ctx, target, newPullConfig()) + cancel() + require.NoError(t, err, "cycle %d should succeed", i) + } + + // Allow goroutines to settle. + time.Sleep(500 * time.Millisecond) + + finalGoroutines := runtime.NumGoroutine() + // Allow some tolerance (goroutines from test framework, etc.). + assert.InDelta(t, baseGoroutines, finalGoroutines, 20, + "goroutine count should be stable after 100 pull cycles (no leaks)") +} +``` + +- [ ] **Step 3: Verify stress tests compile** + +Run: `go test ./pkg/modelfile/ -tags stress -list TestStress -count=1` +Run: `go test ./pkg/backend/ -tags stress -list TestStress -count=1` +Expected: Lists test names. + +- [ ] **Step 4: Commit** + +```bash +git add pkg/modelfile/modelfile_stress_test.go pkg/backend/pull_stress_test.go +git commit -s -m "test: add stress tests for file count, nesting depth, and pull cycles (//go:build stress)" +``` + +--- + +## Self-Review Checklist + +**Spec coverage:** All 8 dimensions mapped to tasks. All ~35 tests accounted for: +- Dim 1 (functional): Tasks 3, 5, 6, 7 ✓ +- Dim 2 (network): Tasks 5, 6, 8 ✓ +- Dim 3 (resource leak): Task 6 ✓ +- Dim 4 (concurrency): Tasks 4, 5 ✓ +- Dim 5 (stress): Task 9 ✓ +- Dim 6 (data integrity): Tasks 5, 6 ✓ +- Dim 7 (graceful shutdown): Tasks 5, 6 ✓ +- Dim 8 (idempotency/config): Tasks 5, 6, 7 ✓ + +**Known bugs:** #491 in Task 6, #493 in Task 4, #494 in Task 8 ✓ + +**Placeholder scan:** No TBD/TODO. All code blocks contain real code. Implementation notes marked with "Note for implementer" where mock method signatures may need adjustment. + +**Type consistency:** `newPullTestFixture`, `newMockStorageForPull`, `newPullConfig` used consistently across Tasks 5, 8, 9. `newPushTestFixture`, `newMockStorageForPush`, `newPushConfig` used consistently across Tasks 6, 8. From 8af02f1283aa508d11d722f67cf4f4dc911ba97e Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Wed, 8 Apr 2026 10:28:14 +0800 Subject: [PATCH 04/15] test: add shared mock OCI registry with fault injection Signed-off-by: Zhao Chen --- test/helpers/mockregistry.go | 480 ++++++++++++++++++++++++++++++ test/helpers/mockregistry_test.go | 236 +++++++++++++++ 2 files changed, 716 insertions(+) create mode 100644 test/helpers/mockregistry.go create mode 100644 test/helpers/mockregistry_test.go diff --git a/test/helpers/mockregistry.go b/test/helpers/mockregistry.go new file mode 100644 index 00000000..20a31fb3 --- /dev/null +++ b/test/helpers/mockregistry.go @@ -0,0 +1,480 @@ +/* + * Copyright 2025 The CNAI Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Package helpers provides shared test utilities for integration tests. +package helpers + +import ( + "bytes" + "encoding/json" + "fmt" + "net" + "net/http" + "net/http/httptest" + "strings" + "sync" + "sync/atomic" + "time" + + godigest "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" +) + +const ( + // manifestMediaType is the OCI image manifest media type. + manifestMediaType = ocispec.MediaTypeImageManifest +) + +// FaultConfig describes fault injection parameters for the mock registry. +// Path-specific faults (via PathFaults) override the global config when the +// request path has a matching suffix. +type FaultConfig struct { + // LatencyPerRequest is an artificial delay applied before processing. + LatencyPerRequest time.Duration + + // FailAfterNBytes causes the connection to be force-closed after writing + // this many bytes of the response body. 0 = disabled. + FailAfterNBytes int64 + + // StatusCodeOverride forces every matching request to return this HTTP + // status code. 0 = disabled. + StatusCodeOverride int + + // FailOnNthRequest makes the first N requests (1-based) fail with 500. + // Request N+1 and onwards succeed normally. 0 = disabled. + FailOnNthRequest int + + // PathFaults maps a path suffix to a FaultConfig that overrides the global + // config for requests whose URL path ends with that suffix. + PathFaults map[string]*FaultConfig +} + +// MockRegistry is an in-memory OCI Distribution API server backed by +// httptest.Server. It is safe for concurrent use. +type MockRegistry struct { + server *httptest.Server + + mu sync.RWMutex + manifests map[string][]byte // key: "repo:ref" + blobs map[string][]byte // key: digest string + // pendingUploads tracks in-progress blob uploads keyed by upload UUID. + pendingUploads map[string]*bytes.Buffer + + fault *FaultConfig + faultMu sync.Mutex + requestCount int64 // accessed atomically + // pathCounts maps a path suffix to an atomic counter stored as *int64. + pathCounts sync.Map + + // failCounter tracks how many requests have been seen for FailOnNthRequest. + failCounter int64 // accessed atomically +} + +// NewMockRegistry creates and starts a new mock OCI registry server. +func NewMockRegistry() *MockRegistry { + r := &MockRegistry{ + manifests: make(map[string][]byte), + blobs: make(map[string][]byte), + pendingUploads: make(map[string]*bytes.Buffer), + } + r.server = httptest.NewServer(http.HandlerFunc(r.handleRequest)) + return r +} + +// WithFault sets global fault injection config and returns the registry for +// chaining. +func (r *MockRegistry) WithFault(f *FaultConfig) *MockRegistry { + r.faultMu.Lock() + r.fault = f + r.faultMu.Unlock() + return r +} + +// AddManifest serialises manifest and stores it under "repo:ref". +func (r *MockRegistry) AddManifest(ref string, manifest ocispec.Manifest) *MockRegistry { + data, err := json.Marshal(manifest) + if err != nil { + panic(fmt.Sprintf("mockregistry: failed to marshal manifest: %v", err)) + } + r.mu.Lock() + r.manifests[ref] = data + r.mu.Unlock() + return r +} + +// AddBlob stores content and returns its OCI digest string. +func (r *MockRegistry) AddBlob(content []byte) string { + dgst := godigest.FromBytes(content).String() + r.mu.Lock() + r.blobs[dgst] = content + r.mu.Unlock() + return dgst +} + +// AddBlobWithDigest stores content under a caller-supplied digest string. +// This is useful for corruption tests where the stored digest does not match +// the actual content. +func (r *MockRegistry) AddBlobWithDigest(digest string, content []byte) *MockRegistry { + r.mu.Lock() + r.blobs[digest] = content + r.mu.Unlock() + return r +} + +// Host returns "127.0.0.1:PORT" (no scheme) suitable for use as an OCI +// registry reference host. +func (r *MockRegistry) Host() string { + return r.server.Listener.Addr().(*net.TCPAddr).String() +} + +// Close shuts down the underlying httptest.Server. +func (r *MockRegistry) Close() { + r.server.Close() +} + +// RequestCount returns the total number of requests received. +func (r *MockRegistry) RequestCount() int64 { + return atomic.LoadInt64(&r.requestCount) +} + +// RequestCountByPath returns the number of requests whose URL path ends with +// pathSuffix. +func (r *MockRegistry) RequestCountByPath(pathSuffix string) int64 { + if v, ok := r.pathCounts.Load(pathSuffix); ok { + return atomic.LoadInt64(v.(*int64)) + } + return 0 +} + +// BlobExists reports whether a blob with the given digest is stored. +func (r *MockRegistry) BlobExists(digest string) bool { + r.mu.RLock() + _, ok := r.blobs[digest] + r.mu.RUnlock() + return ok +} + +// GetBlob returns the content for the given digest, and whether it was found. +func (r *MockRegistry) GetBlob(digest string) ([]byte, bool) { + r.mu.RLock() + data, ok := r.blobs[digest] + r.mu.RUnlock() + return data, ok +} + +// ManifestExists reports whether a manifest stored under ref exists. +func (r *MockRegistry) ManifestExists(ref string) bool { + r.mu.RLock() + _, ok := r.manifests[ref] + r.mu.RUnlock() + return ok +} + +// ----------------------------------------------------------------------- +// Internal helpers +// ----------------------------------------------------------------------- + +// effectiveFault returns the FaultConfig to apply for path, considering +// per-path overrides. Returns nil if no fault is configured. +func (r *MockRegistry) effectiveFault(path string) *FaultConfig { + r.faultMu.Lock() + f := r.fault + r.faultMu.Unlock() + + if f == nil { + return nil + } + for suffix, pf := range f.PathFaults { + if strings.HasSuffix(path, suffix) { + return pf + } + } + return f +} + +// incrementPathCount bumps all registered path-suffix counters that match. +func (r *MockRegistry) incrementPathCount(path string) { + r.faultMu.Lock() + f := r.fault + r.faultMu.Unlock() + + if f == nil { + return + } + for suffix := range f.PathFaults { + if strings.HasSuffix(path, suffix) { + r.bumpPathCounter(suffix) + } + } +} + +func (r *MockRegistry) bumpPathCounter(suffix string) { + var zero int64 + actual, _ := r.pathCounts.LoadOrStore(suffix, &zero) + atomic.AddInt64(actual.(*int64), 1) +} + +// handleRequest is the top-level HTTP handler. +func (r *MockRegistry) handleRequest(w http.ResponseWriter, req *http.Request) { + atomic.AddInt64(&r.requestCount, 1) + r.incrementPathCount(req.URL.Path) + + f := r.effectiveFault(req.URL.Path) + + // Apply latency. + if f != nil && f.LatencyPerRequest > 0 { + time.Sleep(f.LatencyPerRequest) + } + + // Apply FailOnNthRequest. + if f != nil && f.FailOnNthRequest > 0 { + n := atomic.AddInt64(&r.failCounter, 1) + if n <= int64(f.FailOnNthRequest) { + http.Error(w, "fault: fail on nth request", http.StatusInternalServerError) + return + } + } + + // Apply StatusCodeOverride. + if f != nil && f.StatusCodeOverride != 0 { + w.WriteHeader(f.StatusCodeOverride) + return + } + + r.route(w, req, f) +} + +// route dispatches to the appropriate OCI endpoint handler. +func (r *MockRegistry) route(w http.ResponseWriter, req *http.Request, f *FaultConfig) { + path := req.URL.Path + + // GET /v2/ — registry ping + if path == "/v2/" { + w.WriteHeader(http.StatusOK) + return + } + + // /v2//manifests/ + if idx := strings.Index(path, "/manifests/"); idx != -1 { + prefix := path[:idx] // /v2/ + ref := path[idx+len("/manifests/"):] + name := strings.TrimPrefix(prefix, "/v2/") + r.handleManifest(w, req, name, ref, f) + return + } + + // /v2//blobs/uploads/ or /v2//blobs/uploads/ + if strings.Contains(path, "/blobs/uploads") { + r.handleBlobUpload(w, req, path, f) + return + } + + // /v2//blobs/ + if idx := strings.Index(path, "/blobs/"); idx != -1 { + digest := path[idx+len("/blobs/"):] + r.handleBlob(w, req, digest, f) + return + } + + http.Error(w, "not found", http.StatusNotFound) +} + +// handleManifest handles HEAD/GET/PUT on /v2//manifests/. +func (r *MockRegistry) handleManifest(w http.ResponseWriter, req *http.Request, name, ref string, f *FaultConfig) { + key := name + ":" + ref + + switch req.Method { + case http.MethodHead, http.MethodGet: + r.mu.RLock() + data, ok := r.manifests[key] + r.mu.RUnlock() + + if !ok { + http.Error(w, "manifest not found", http.StatusNotFound) + return + } + + dgst := godigest.FromBytes(data).String() + w.Header().Set("Content-Type", manifestMediaType) + w.Header().Set("Docker-Content-Digest", dgst) + w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data))) + if req.Method == http.MethodHead { + w.WriteHeader(http.StatusOK) + return + } + writeBody(w, data, f) + + case http.MethodPut: + buf := new(bytes.Buffer) + if _, err := buf.ReadFrom(req.Body); err != nil { + http.Error(w, "read error", http.StatusInternalServerError) + return + } + data := buf.Bytes() + dgst := godigest.FromBytes(data).String() + + r.mu.Lock() + r.manifests[key] = data + r.mu.Unlock() + + w.Header().Set("Docker-Content-Digest", dgst) + w.WriteHeader(http.StatusCreated) + + default: + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + } +} + +// handleBlob handles HEAD/GET on /v2//blobs/. +func (r *MockRegistry) handleBlob(w http.ResponseWriter, req *http.Request, digest string, f *FaultConfig) { + switch req.Method { + case http.MethodHead, http.MethodGet: + r.mu.RLock() + data, ok := r.blobs[digest] + r.mu.RUnlock() + + if !ok { + http.Error(w, "blob not found", http.StatusNotFound) + return + } + + w.Header().Set("Content-Type", "application/octet-stream") + w.Header().Set("Docker-Content-Digest", digest) + w.Header().Set("Content-Length", fmt.Sprintf("%d", len(data))) + if req.Method == http.MethodHead { + w.WriteHeader(http.StatusOK) + return + } + writeBody(w, data, f) + + default: + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) + } +} + +// handleBlobUpload handles the multi-step blob upload protocol. +func (r *MockRegistry) handleBlobUpload(w http.ResponseWriter, req *http.Request, path string, f *FaultConfig) { + // POST /v2//blobs/uploads/ — initiate upload + if req.Method == http.MethodPost && strings.HasSuffix(path, "/blobs/uploads/") { + uuid := godigest.FromString(fmt.Sprintf("%d", time.Now().UnixNano())).Hex()[:16] + r.mu.Lock() + r.pendingUploads[uuid] = new(bytes.Buffer) + r.mu.Unlock() + + // Extract name from path: /v2//blobs/uploads/ + name := strings.TrimPrefix(strings.TrimSuffix(path, "/blobs/uploads/"), "/v2/") + location := fmt.Sprintf("/v2/%s/blobs/uploads/%s", name, uuid) + w.Header().Set("Location", location) + w.Header().Set("Docker-Upload-UUID", uuid) + w.WriteHeader(http.StatusAccepted) + return + } + + // PATCH /v2//blobs/uploads/ — chunked upload + if req.Method == http.MethodPatch { + uuid := uploadUUID(path) + r.mu.Lock() + buf, ok := r.pendingUploads[uuid] + if !ok { + r.mu.Unlock() + http.Error(w, "upload not found", http.StatusNotFound) + return + } + if _, err := buf.ReadFrom(req.Body); err != nil { + r.mu.Unlock() + http.Error(w, "read error", http.StatusInternalServerError) + return + } + r.mu.Unlock() + w.Header().Set("Location", path) + w.WriteHeader(http.StatusNoContent) + return + } + + // PUT /v2//blobs/uploads/?digest= — complete upload + if req.Method == http.MethodPut { + uuid := uploadUUID(path) + dgst := req.URL.Query().Get("digest") + if dgst == "" { + http.Error(w, "missing digest", http.StatusBadRequest) + return + } + + r.mu.Lock() + buf, ok := r.pendingUploads[uuid] + if !ok { + r.mu.Unlock() + http.Error(w, "upload not found", http.StatusNotFound) + return + } + // Append any final body bytes. + if req.Body != nil { + if _, err := buf.ReadFrom(req.Body); err != nil { + r.mu.Unlock() + http.Error(w, "read error", http.StatusInternalServerError) + return + } + } + data := buf.Bytes() + delete(r.pendingUploads, uuid) + r.blobs[dgst] = append([]byte(nil), data...) + r.mu.Unlock() + + w.Header().Set("Docker-Content-Digest", dgst) + w.WriteHeader(http.StatusCreated) + return + } + + http.Error(w, "method not allowed", http.StatusMethodNotAllowed) +} + +// uploadUUID extracts the UUID from a blob upload path such as +// /v2//blobs/uploads/. +func uploadUUID(path string) string { + parts := strings.Split(path, "/blobs/uploads/") + if len(parts) < 2 { + return "" + } + return parts[1] +} + +// writeBody writes data to w, honouring FailAfterNBytes by hijacking the +// connection mid-write when the limit is reached. +func writeBody(w http.ResponseWriter, data []byte, f *FaultConfig) { + if f == nil || f.FailAfterNBytes <= 0 { + _, _ = w.Write(data) + return + } + + limit := f.FailAfterNBytes + if limit > int64(len(data)) { + limit = int64(len(data)) + } + + // Write the permitted bytes first. + _, _ = w.Write(data[:limit]) + + // Hijack the connection and close it abruptly. + hj, ok := w.(http.Hijacker) + if !ok { + return + } + conn, _, err := hj.Hijack() + if err != nil { + return + } + _ = conn.Close() +} diff --git a/test/helpers/mockregistry_test.go b/test/helpers/mockregistry_test.go new file mode 100644 index 00000000..27bd6ac7 --- /dev/null +++ b/test/helpers/mockregistry_test.go @@ -0,0 +1,236 @@ +/* + * Copyright 2025 The CNAI Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package helpers + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "testing" + "time" + + godigest "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// baseURL returns the HTTP base URL for the mock registry. +func baseURL(r *MockRegistry) string { + return "http://" + r.Host() +} + +// TestMockRegistry_PingAndBlobRoundTrip verifies the /v2/ ping and blob GET. +func TestMockRegistry_PingAndBlobRoundTrip(t *testing.T) { + r := NewMockRegistry() + defer r.Close() + + content := []byte("hello, blob!") + dgst := r.AddBlob(content) + + // Ping + resp, err := http.Get(baseURL(r) + "/v2/") + require.NoError(t, err) + resp.Body.Close() + assert.Equal(t, http.StatusOK, resp.StatusCode) + + // GET blob + resp, err = http.Get(fmt.Sprintf("%s/v2/myrepo/blobs/%s", baseURL(r), dgst)) + require.NoError(t, err) + defer resp.Body.Close() + assert.Equal(t, http.StatusOK, resp.StatusCode) + + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + assert.Equal(t, content, body) + + // Verify helper + assert.True(t, r.BlobExists(dgst)) + stored, ok := r.GetBlob(dgst) + require.True(t, ok) + assert.Equal(t, content, stored) +} + +// TestMockRegistry_ManifestRoundTrip verifies manifest storage and retrieval. +func TestMockRegistry_ManifestRoundTrip(t *testing.T) { + r := NewMockRegistry() + defer r.Close() + + content := []byte("layer-content") + dgst := godigest.FromBytes(content) + + manifest := ocispec.Manifest{ + MediaType: ocispec.MediaTypeImageManifest, + Layers: []ocispec.Descriptor{ + { + MediaType: "application/octet-stream", + Digest: dgst, + Size: int64(len(content)), + }, + }, + } + + r.AddManifest("myrepo:latest", manifest) + assert.True(t, r.ManifestExists("myrepo:latest")) + + // HEAD + req, _ := http.NewRequest(http.MethodHead, baseURL(r)+"/v2/myrepo/manifests/latest", nil) + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + resp.Body.Close() + assert.Equal(t, http.StatusOK, resp.StatusCode) + assert.NotEmpty(t, resp.Header.Get("Docker-Content-Digest")) + + // GET + resp, err = http.Get(baseURL(r) + "/v2/myrepo/manifests/latest") + require.NoError(t, err) + defer resp.Body.Close() + assert.Equal(t, http.StatusOK, resp.StatusCode) + assert.Equal(t, manifestMediaType, resp.Header.Get("Content-Type")) + + var got ocispec.Manifest + require.NoError(t, json.NewDecoder(resp.Body).Decode(&got)) + require.Len(t, got.Layers, 1) + assert.Equal(t, dgst, got.Layers[0].Digest) +} + +// TestMockRegistry_FaultStatusCodeOverride verifies StatusCodeOverride. +func TestMockRegistry_FaultStatusCodeOverride(t *testing.T) { + r := NewMockRegistry().WithFault(&FaultConfig{StatusCodeOverride: http.StatusServiceUnavailable}) + defer r.Close() + + resp, err := http.Get(baseURL(r) + "/v2/") + require.NoError(t, err) + resp.Body.Close() + assert.Equal(t, http.StatusServiceUnavailable, resp.StatusCode) +} + +// TestMockRegistry_FaultFailOnNthRequest verifies that the first N requests +// return 500 and request N+1 succeeds. +func TestMockRegistry_FaultFailOnNthRequest(t *testing.T) { + const n = 3 + r := NewMockRegistry().WithFault(&FaultConfig{FailOnNthRequest: n}) + defer r.Close() + + for i := 1; i <= n; i++ { + resp, err := http.Get(baseURL(r) + "/v2/") + require.NoError(t, err) + resp.Body.Close() + assert.Equal(t, http.StatusInternalServerError, resp.StatusCode, + "request %d should fail", i) + } + + // Request n+1 should succeed. + resp, err := http.Get(baseURL(r) + "/v2/") + require.NoError(t, err) + resp.Body.Close() + assert.Equal(t, http.StatusOK, resp.StatusCode, "request %d should succeed", n+1) +} + +// TestMockRegistry_FaultLatency verifies LatencyPerRequest injects a delay. +func TestMockRegistry_FaultLatency(t *testing.T) { + const delay = 100 * time.Millisecond + r := NewMockRegistry().WithFault(&FaultConfig{LatencyPerRequest: delay}) + defer r.Close() + + start := time.Now() + resp, err := http.Get(baseURL(r) + "/v2/") + elapsed := time.Since(start) + + require.NoError(t, err) + resp.Body.Close() + assert.Equal(t, http.StatusOK, resp.StatusCode) + assert.GreaterOrEqual(t, elapsed, delay, "response should be delayed by at least %s", delay) +} + +// TestMockRegistry_RequestCounting verifies global and per-path counters. +func TestMockRegistry_RequestCounting(t *testing.T) { + r := NewMockRegistry().WithFault(&FaultConfig{ + PathFaults: map[string]*FaultConfig{ + "/v2/": {}, + }, + }) + defer r.Close() + + for i := 0; i < 5; i++ { + resp, err := http.Get(baseURL(r) + "/v2/") + require.NoError(t, err) + resp.Body.Close() + } + + assert.Equal(t, int64(5), r.RequestCount()) + assert.Equal(t, int64(5), r.RequestCountByPath("/v2/")) +} + +// TestMockRegistry_BlobUploadRoundTrip verifies POST start + PUT complete. +func TestMockRegistry_BlobUploadRoundTrip(t *testing.T) { + r := NewMockRegistry() + defer r.Close() + + content := []byte("upload-me-please") + dgst := godigest.FromBytes(content).String() + + // POST /v2/myrepo/blobs/uploads/ — start upload + resp, err := http.Post(baseURL(r)+"/v2/myrepo/blobs/uploads/", "", nil) + require.NoError(t, err) + resp.Body.Close() + require.Equal(t, http.StatusAccepted, resp.StatusCode) + + location := resp.Header.Get("Location") + require.NotEmpty(t, location, "Location header must be set") + + // PUT ?digest= — complete upload + putURL := fmt.Sprintf("%s%s?digest=%s", baseURL(r), location, dgst) + putReq, err := http.NewRequest(http.MethodPut, putURL, nil) + require.NoError(t, err) + + // Send body in the PUT (single-shot upload). + putReq.Body = io.NopCloser(newReader(content)) + putReq.ContentLength = int64(len(content)) + + resp, err = http.DefaultClient.Do(putReq) + require.NoError(t, err) + resp.Body.Close() + require.Equal(t, http.StatusCreated, resp.StatusCode) + + // Verify blob is stored. + assert.True(t, r.BlobExists(dgst)) + stored, ok := r.GetBlob(dgst) + require.True(t, ok) + assert.Equal(t, content, stored) +} + +// newReader wraps a byte slice in an io.Reader without importing bytes at the +// test-package level (bytes is already imported in the main file). +func newReader(b []byte) io.Reader { + return &bytesReader{data: b} +} + +type bytesReader struct { + data []byte + pos int +} + +func (br *bytesReader) Read(p []byte) (int, error) { + if br.pos >= len(br.data) { + return 0, io.EOF + } + n := copy(p, br.data[br.pos:]) + br.pos += n + return n, nil +} From c0f2ed4076ee99809bf22d48ffccc6432c8dd368 Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Wed, 8 Apr 2026 10:30:34 +0800 Subject: [PATCH 05/15] fix: decouple RequestCountByPath from PathFaults in MockRegistry Signed-off-by: Zhao Chen --- test/helpers/mockregistry.go | 37 +++++++++++++++---------------- test/helpers/mockregistry_test.go | 7 ++---- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/test/helpers/mockregistry.go b/test/helpers/mockregistry.go index 20a31fb3..84f1a149 100644 --- a/test/helpers/mockregistry.go +++ b/test/helpers/mockregistry.go @@ -95,10 +95,12 @@ func NewMockRegistry() *MockRegistry { } // WithFault sets global fault injection config and returns the registry for -// chaining. +// chaining. It also resets the failCounter so that FailOnNthRequest behaves +// correctly when WithFault is called multiple times on the same registry. func (r *MockRegistry) WithFault(f *FaultConfig) *MockRegistry { r.faultMu.Lock() r.fault = f + atomic.StoreInt64(&r.failCounter, 0) r.faultMu.Unlock() return r } @@ -151,12 +153,18 @@ func (r *MockRegistry) RequestCount() int64 { } // RequestCountByPath returns the number of requests whose URL path ends with -// pathSuffix. +// pathSuffix. It works independently of fault configuration: every incoming +// request path is tracked, so callers do not need to declare a PathFault entry +// to observe request counts. func (r *MockRegistry) RequestCountByPath(pathSuffix string) int64 { - if v, ok := r.pathCounts.Load(pathSuffix); ok { - return atomic.LoadInt64(v.(*int64)) - } - return 0 + var total int64 + r.pathCounts.Range(func(key, value any) bool { + if strings.HasSuffix(key.(string), pathSuffix) { + total += atomic.LoadInt64(value.(*int64)) + } + return true + }) + return total } // BlobExists reports whether a blob with the given digest is stored. @@ -205,20 +213,11 @@ func (r *MockRegistry) effectiveFault(path string) *FaultConfig { return f } -// incrementPathCount bumps all registered path-suffix counters that match. +// incrementPathCount records a hit for the exact request path so that +// RequestCountByPath can later sum counts for any matching suffix. +// This is independent of fault configuration. func (r *MockRegistry) incrementPathCount(path string) { - r.faultMu.Lock() - f := r.fault - r.faultMu.Unlock() - - if f == nil { - return - } - for suffix := range f.PathFaults { - if strings.HasSuffix(path, suffix) { - r.bumpPathCounter(suffix) - } - } + r.bumpPathCounter(path) } func (r *MockRegistry) bumpPathCounter(suffix string) { diff --git a/test/helpers/mockregistry_test.go b/test/helpers/mockregistry_test.go index 27bd6ac7..b93a0ec4 100644 --- a/test/helpers/mockregistry_test.go +++ b/test/helpers/mockregistry_test.go @@ -159,12 +159,9 @@ func TestMockRegistry_FaultLatency(t *testing.T) { } // TestMockRegistry_RequestCounting verifies global and per-path counters. +// No fault configuration is needed: every request path is tracked regardless. func TestMockRegistry_RequestCounting(t *testing.T) { - r := NewMockRegistry().WithFault(&FaultConfig{ - PathFaults: map[string]*FaultConfig{ - "/v2/": {}, - }, - }) + r := NewMockRegistry() defer r.Close() for i := 0; i < 5; i++ { From e800b005722c804dc1c4c37b0e443276d6e4c24b Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Wed, 8 Apr 2026 10:35:11 +0800 Subject: [PATCH 06/15] test: fix review issues in MockRegistry (data race, PATCH status, test coverage) Signed-off-by: Zhao Chen --- test/helpers/mockregistry.go | 23 ++++++----- test/helpers/mockregistry_test.go | 66 +++++++++++++++++++++++-------- 2 files changed, 61 insertions(+), 28 deletions(-) diff --git a/test/helpers/mockregistry.go b/test/helpers/mockregistry.go index 84f1a149..012ff73f 100644 --- a/test/helpers/mockregistry.go +++ b/test/helpers/mockregistry.go @@ -80,7 +80,13 @@ type MockRegistry struct { pathCounts sync.Map // failCounter tracks how many requests have been seen for FailOnNthRequest. + // This is a single counter shared across all fault contexts (global and all + // PathFaults entries), so FailOnNthRequest on a per-path fault interacts with + // the same counter as the global fault and other per-path faults. failCounter int64 // accessed atomically + + // uploadSeq is used to generate unique upload UUIDs in a race-free manner. + uploadSeq atomic.Int64 } // NewMockRegistry creates and starts a new mock OCI registry server. @@ -213,23 +219,16 @@ func (r *MockRegistry) effectiveFault(path string) *FaultConfig { return f } -// incrementPathCount records a hit for the exact request path so that -// RequestCountByPath can later sum counts for any matching suffix. -// This is independent of fault configuration. -func (r *MockRegistry) incrementPathCount(path string) { - r.bumpPathCounter(path) -} - func (r *MockRegistry) bumpPathCounter(suffix string) { - var zero int64 - actual, _ := r.pathCounts.LoadOrStore(suffix, &zero) + newVal := new(int64) + actual, _ := r.pathCounts.LoadOrStore(suffix, newVal) atomic.AddInt64(actual.(*int64), 1) } // handleRequest is the top-level HTTP handler. func (r *MockRegistry) handleRequest(w http.ResponseWriter, req *http.Request) { atomic.AddInt64(&r.requestCount, 1) - r.incrementPathCount(req.URL.Path) + r.bumpPathCounter(req.URL.Path) f := r.effectiveFault(req.URL.Path) @@ -368,7 +367,7 @@ func (r *MockRegistry) handleBlob(w http.ResponseWriter, req *http.Request, dige func (r *MockRegistry) handleBlobUpload(w http.ResponseWriter, req *http.Request, path string, f *FaultConfig) { // POST /v2//blobs/uploads/ — initiate upload if req.Method == http.MethodPost && strings.HasSuffix(path, "/blobs/uploads/") { - uuid := godigest.FromString(fmt.Sprintf("%d", time.Now().UnixNano())).Hex()[:16] + uuid := fmt.Sprintf("upload-%d", r.uploadSeq.Add(1)) r.mu.Lock() r.pendingUploads[uuid] = new(bytes.Buffer) r.mu.Unlock() @@ -399,7 +398,7 @@ func (r *MockRegistry) handleBlobUpload(w http.ResponseWriter, req *http.Request } r.mu.Unlock() w.Header().Set("Location", path) - w.WriteHeader(http.StatusNoContent) + w.WriteHeader(http.StatusAccepted) return } diff --git a/test/helpers/mockregistry_test.go b/test/helpers/mockregistry_test.go index b93a0ec4..b5e97ab2 100644 --- a/test/helpers/mockregistry_test.go +++ b/test/helpers/mockregistry_test.go @@ -17,6 +17,7 @@ package helpers import ( + "bytes" "encoding/json" "fmt" "io" @@ -197,7 +198,7 @@ func TestMockRegistry_BlobUploadRoundTrip(t *testing.T) { require.NoError(t, err) // Send body in the PUT (single-shot upload). - putReq.Body = io.NopCloser(newReader(content)) + putReq.Body = io.NopCloser(bytes.NewReader(content)) putReq.ContentLength = int64(len(content)) resp, err = http.DefaultClient.Do(putReq) @@ -212,22 +213,55 @@ func TestMockRegistry_BlobUploadRoundTrip(t *testing.T) { assert.Equal(t, content, stored) } -// newReader wraps a byte slice in an io.Reader without importing bytes at the -// test-package level (bytes is already imported in the main file). -func newReader(b []byte) io.Reader { - return &bytesReader{data: b} -} +// TestMockRegistry_FaultFailAfterNBytes verifies that FailAfterNBytes truncates +// the response body and closes the connection. +func TestMockRegistry_FaultFailAfterNBytes(t *testing.T) { + r := NewMockRegistry() + defer r.Close() + + content := []byte("0123456789abcdef") // 16 bytes + dgst := r.AddBlob(content) + + const limit = int64(8) + r.WithFault(&FaultConfig{FailAfterNBytes: limit}) -type bytesReader struct { - data []byte - pos int + resp, err := http.Get(fmt.Sprintf("%s/v2/myrepo/blobs/%s", baseURL(r), dgst)) + require.NoError(t, err) + defer resp.Body.Close() + + body, _ := io.ReadAll(resp.Body) + assert.LessOrEqual(t, int64(len(body)), limit, + "response body should be truncated to at most %d bytes, got %d", limit, len(body)) } -func (br *bytesReader) Read(p []byte) (int, error) { - if br.pos >= len(br.data) { - return 0, io.EOF - } - n := copy(p, br.data[br.pos:]) - br.pos += n - return n, nil +// TestMockRegistry_FaultPathSpecific verifies that a PathFaults entry overrides +// the global FaultConfig for the matching path. +func TestMockRegistry_FaultPathSpecific(t *testing.T) { + r := NewMockRegistry() + defer r.Close() + + // Global fault: return 503 for everything. + // Path-specific fault on /v2/: return 200 (no override, just no fault). + r.WithFault(&FaultConfig{ + StatusCodeOverride: http.StatusServiceUnavailable, + PathFaults: map[string]*FaultConfig{ + "/v2/": {}, // no fault for the ping endpoint + }, + }) + + // /v2/ should be unaffected by the global 503 override. + resp, err := http.Get(baseURL(r) + "/v2/") + require.NoError(t, err) + resp.Body.Close() + assert.Equal(t, http.StatusOK, resp.StatusCode, + "path-specific empty fault should override global 503") + + // A non-overridden path should still get the global 503. + content := []byte("test-blob") + dgst := r.AddBlob(content) + resp, err = http.Get(fmt.Sprintf("%s/v2/myrepo/blobs/%s", baseURL(r), dgst)) + require.NoError(t, err) + resp.Body.Close() + assert.Equal(t, http.StatusServiceUnavailable, resp.StatusCode, + "non-overridden path should receive global status code override") } From 7594fcd69ed21599cdc8e0e16935b939830f5309 Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Wed, 8 Apr 2026 10:36:00 +0800 Subject: [PATCH 07/15] test: add TrackingReadCloser utility for resource leak detection Signed-off-by: Zhao Chen --- test/helpers/tracking.go | 43 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 test/helpers/tracking.go diff --git a/test/helpers/tracking.go b/test/helpers/tracking.go new file mode 100644 index 00000000..46c01fb7 --- /dev/null +++ b/test/helpers/tracking.go @@ -0,0 +1,43 @@ +package helpers + +import ( + "io" + "sync/atomic" + "testing" + + "github.com/stretchr/testify/assert" +) + +// TrackingReadCloser wraps an io.ReadCloser and records whether Close() was called. +type TrackingReadCloser struct { + io.ReadCloser + closed atomic.Bool +} + +// NewTrackingReadCloser wraps rc with close-tracking. +func NewTrackingReadCloser(rc io.ReadCloser) *TrackingReadCloser { + return &TrackingReadCloser{ReadCloser: rc} +} + +// Close marks the closer as closed and delegates to the underlying closer. +func (t *TrackingReadCloser) Close() error { + t.closed.Store(true) + return t.ReadCloser.Close() +} + +// WasClosed returns true if Close() was called. +func (t *TrackingReadCloser) WasClosed() bool { + return t.closed.Load() +} + +// AssertClosed asserts Close() was called. +func (t *TrackingReadCloser) AssertClosed(tb testing.TB) { + tb.Helper() + assert.True(tb, t.closed.Load(), "ReadCloser was not closed") +} + +// AssertNotClosed asserts Close() was NOT called (for reverse assertions on known bugs). +func (t *TrackingReadCloser) AssertNotClosed(tb testing.TB) { + tb.Helper() + assert.False(tb, t.closed.Load(), "ReadCloser was unexpectedly closed — bug may be fixed!") +} From bf55199c19f33962e698ff2f8d1bfe850f842ed0 Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Wed, 8 Apr 2026 10:36:59 +0800 Subject: [PATCH 08/15] test: add integration tests for ExcludePatterns through NewModelfileByWorkspace Signed-off-by: Zhao Chen --- pkg/modelfile/modelfile_integration_test.go | 91 +++++++++++++++++++++ 1 file changed, 91 insertions(+) create mode 100644 pkg/modelfile/modelfile_integration_test.go diff --git a/pkg/modelfile/modelfile_integration_test.go b/pkg/modelfile/modelfile_integration_test.go new file mode 100644 index 00000000..a9599a18 --- /dev/null +++ b/pkg/modelfile/modelfile_integration_test.go @@ -0,0 +1,91 @@ +package modelfile + +import ( + "os" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + configmodelfile "github.com/modelpack/modctl/pkg/config/modelfile" +) + +func TestIntegration_ExcludePatterns_SinglePattern(t *testing.T) { + tempDir := t.TempDir() + + // Create workspace with model files and log files. + files := map[string]string{ + "model.bin": "model data", + "config.json": `{"model_type": "test"}`, + "train.log": "training log", + "eval.log": "eval log", + "run.py": "code", + } + for name, content := range files { + require.NoError(t, os.WriteFile(filepath.Join(tempDir, name), []byte(content), 0644)) + } + + config := &configmodelfile.GenerateConfig{ + Workspace: tempDir, + Name: "exclude-test", + ExcludePatterns: []string{"*.log"}, + } + + mf, err := NewModelfileByWorkspace(tempDir, config) + require.NoError(t, err) + + // Collect all files in the modelfile. + allFiles := append(append(append(mf.GetConfigs(), mf.GetModels()...), mf.GetCodes()...), mf.GetDocs()...) + + // .log files should be excluded. + for _, f := range allFiles { + assert.NotContains(t, f, ".log", "excluded file %s should not appear", f) + } + + // model.bin, config.json, run.py should still be present. + assert.Contains(t, mf.GetModels(), "model.bin") + assert.Contains(t, mf.GetConfigs(), "config.json") + assert.Contains(t, mf.GetCodes(), "run.py") +} + +func TestIntegration_ExcludePatterns_MultiplePatterns(t *testing.T) { + tempDir := t.TempDir() + + dirs := []string{"checkpoints", "src"} + for _, d := range dirs { + require.NoError(t, os.MkdirAll(filepath.Join(tempDir, d), 0755)) + } + + files := map[string]string{ + "model.bin": "model", + "config.json": `{"model_type": "test"}`, + "debug.log": "log", + "checkpoints/step100.bin": "ckpt", + "src/train.py": "code", + } + for name, content := range files { + require.NoError(t, os.WriteFile(filepath.Join(tempDir, name), []byte(content), 0644)) + } + + config := &configmodelfile.GenerateConfig{ + Workspace: tempDir, + Name: "multi-exclude-test", + ExcludePatterns: []string{"*.log", "checkpoints/*"}, + } + + mf, err := NewModelfileByWorkspace(tempDir, config) + require.NoError(t, err) + + allFiles := append(append(append(mf.GetConfigs(), mf.GetModels()...), mf.GetCodes()...), mf.GetDocs()...) + + // .log files and checkpoints/* should be excluded. + for _, f := range allFiles { + assert.NotContains(t, f, ".log") + assert.NotContains(t, f, "checkpoints/") + } + + // Remaining files should be present. + assert.Contains(t, mf.GetModels(), "model.bin") + assert.Contains(t, mf.GetCodes(), "src/train.py") +} From 1922fce27872efedcd436c3f580ab1f622377eb0 Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Wed, 8 Apr 2026 11:24:54 +0800 Subject: [PATCH 09/15] test: add ProgressBar concurrency tests, document #493 data race Signed-off-by: Zhao Chen --- internal/pb/pb_integration_test.go | 101 +++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 internal/pb/pb_integration_test.go diff --git a/internal/pb/pb_integration_test.go b/internal/pb/pb_integration_test.go new file mode 100644 index 00000000..38ae844b --- /dev/null +++ b/internal/pb/pb_integration_test.go @@ -0,0 +1,101 @@ +/* + * Copyright 2025 The CNAI Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package pb + +import ( + "io" + "strings" + "sync" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestKnownBug_DisableProgress_DataRace(t *testing.T) { + // This test documents the data race in SetDisableProgress/Add. + // Run with: go test -race ./internal/pb/ -run TestKnownBug_DisableProgress_DataRace + // + // Known bug: global disableProgress bool has no atomic protection. + // See: https://github.com/modelpack/modctl/issues/493 + // + // When the bug is fixed (atomic.Bool), this test will still pass + // AND the -race detector will stop reporting the race. + // At that point, remove the KnownBug prefix. + + var wg sync.WaitGroup + pb := NewProgressBar(io.Discard) + pb.Start() + defer pb.Stop() + + // Concurrent SetDisableProgress. + for i := 0; i < 10; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := 0; j < 100; j++ { + SetDisableProgress(j%2 == 0) + } + }() + } + + // Concurrent Add. + for i := 0; i < 10; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + for j := 0; j < 100; j++ { + reader := strings.NewReader("test data") + pb.Add("test", "bar-"+string(rune('a'+id)), 9, reader) + } + }(i) + } + + wg.Wait() + // If we get here without panic, the test passes. + // The real detection is via -race flag. +} + +func TestIntegration_ProgressBar_ConcurrentUpdates(t *testing.T) { + pb := NewProgressBar(io.Discard) + pb.Start() + defer pb.Stop() + + // Disable progress to avoid mpb rendering issues in test. + SetDisableProgress(true) + defer SetDisableProgress(false) + + var wg sync.WaitGroup + + // Concurrent Add + Complete + Abort from multiple goroutines. + for i := 0; i < 20; i++ { + wg.Add(1) + go func(id int) { + defer wg.Done() + name := string(rune('a' + id)) + reader := strings.NewReader("data") + pb.Add("prompt", name, 4, reader) + if id%3 == 0 { + pb.Complete(name, "done") + } else if id%3 == 1 { + pb.Abort(name, assert.AnError) + } + }(i) + } + + wg.Wait() + // No panic = success. +} From 1a0b16d0ea7db8aed559d55cde4e92c77c391c8c Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Wed, 8 Apr 2026 11:48:19 +0800 Subject: [PATCH 10/15] test: add pull integration tests covering 6 dimensions Add pkg/backend/pull_integration_test.go with 11 integration tests that exercise the Pull function using MockRegistry (httptest) and mock Storage: Functional correctness: - HappyPath: manifest + 2 blobs, verify PushBlob/PushManifest calls - BlobAlreadyExists: StatBlob returns true, verify PushBlob skipped - ConcurrentLayers: 5 blobs in parallel, verify all stored Network errors: - ContextTimeout: latency + short deadline, verify context error - PartialResponse: FailAfterNBytes on blob, verify error - ManifestOK_BlobFails: 500 on specific blob, verify error Concurrency safety: - ConcurrentPartialFailure: 2 of 5 blobs fail, errgroup cancels Data integrity: - TruncatedBlob: FailAfterNBytes, digest validation catches mismatch - CorruptedBlob: wrong bytes under correct digest, caught by validateDigest Graceful shutdown: - ContextCancelMidDownload: cancel mid-flight, verify error Idempotency: - Idempotent: second pull with all blobs existing skips all writes Signed-off-by: Zhao Chen --- pkg/backend/pull_integration_test.go | 465 +++++++++++++++++++++++++++ 1 file changed, 465 insertions(+) create mode 100644 pkg/backend/pull_integration_test.go diff --git a/pkg/backend/pull_integration_test.go b/pkg/backend/pull_integration_test.go new file mode 100644 index 00000000..0326a1a0 --- /dev/null +++ b/pkg/backend/pull_integration_test.go @@ -0,0 +1,465 @@ +/* + * Copyright 2025 The CNAI Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package backend + +import ( + "context" + "encoding/json" + "fmt" + "io" + "sync/atomic" + "testing" + "time" + + godigest "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/modelpack/modctl/pkg/config" + storageMock "github.com/modelpack/modctl/test/mocks/storage" + + "github.com/modelpack/modctl/test/helpers" +) + +// pullTestFixture holds all objects needed by a pull integration test. +type pullTestFixture struct { + mr *helpers.MockRegistry + store *storageMock.Storage + backend *backend + target string + cfg *config.Pull + blobs [][]byte // raw blob contents + digests []string // blob digests (sha256:...) + manifest ocispec.Manifest +} + +// addManifestWithDigestKey registers the manifest under its digest key so +// that oras-go can fetch it by digest (used by pullIfNotExist for the +// manifest copy step). Call this after any AddManifest("repo:tag", ...). +func addManifestWithDigestKey(t *testing.T, mr *helpers.MockRegistry, repo string, manifest ocispec.Manifest) { + t.Helper() + manifestBytes, err := json.Marshal(manifest) + require.NoError(t, err, "marshal manifest for digest key") + manifestDigest := godigest.FromBytes(manifestBytes).String() + mr.AddManifest(repo+":"+manifestDigest, manifest) +} + +// newPullTestFixture creates a MockRegistry with a manifest containing +// blobCount layers plus a config blob. The storage mock uses Maybe() +// expectations so tests that error out early do not trigger unsatisfied +// expectation failures. +func newPullTestFixture(t *testing.T, blobCount int) *pullTestFixture { + t.Helper() + + mr := helpers.NewMockRegistry() + + // Create layer blobs. + blobs := make([][]byte, blobCount) + digests := make([]string, blobCount) + layers := make([]ocispec.Descriptor, blobCount) + for i := 0; i < blobCount; i++ { + blobs[i] = []byte(fmt.Sprintf("blob-content-%d-padding-to-make-it-longer", i)) + digests[i] = mr.AddBlob(blobs[i]) + layers[i] = ocispec.Descriptor{ + MediaType: "application/octet-stream", + Digest: godigest.Digest(digests[i]), + Size: int64(len(blobs[i])), + } + } + + // Create config blob. + configContent := []byte(`{"architecture":"amd64"}`) + configDigest := mr.AddBlob(configContent) + configDesc := ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageConfig, + Digest: godigest.Digest(configDigest), + Size: int64(len(configContent)), + } + + manifest := ocispec.Manifest{ + MediaType: ocispec.MediaTypeImageManifest, + Config: configDesc, + Layers: layers, + } + mr.AddManifest("test/model:latest", manifest) + addManifestWithDigestKey(t, mr, "test/model", manifest) + + s := newMockStorageForPull(t) + + return &pullTestFixture{ + mr: mr, + store: s, + backend: &backend{store: s}, + target: mr.Host() + "/test/model:latest", + cfg: newPullConfig(), + blobs: blobs, + digests: digests, + manifest: manifest, + } +} + +// newMockStorageForPull creates a mock Storage with Maybe() expectations +// that accept all Stat/Push calls. Using Maybe() ensures tests that error +// out early (network errors, context cancellation) do not fail from +// unsatisfied expectations. +func newMockStorageForPull(t *testing.T) *storageMock.Storage { + t.Helper() + s := storageMock.NewStorage(t) + s.On("StatBlob", mock.Anything, mock.Anything, mock.Anything). + Maybe().Return(false, nil) + s.On("PushBlob", mock.Anything, mock.Anything, mock.Anything, mock.Anything). + Maybe(). + Run(func(args mock.Arguments) { + // Drain the reader so the pull pipeline completes and digest + // validation sees all bytes. + r := args.Get(2).(io.Reader) + _, _ = io.Copy(io.Discard, r) + }). + Return("", int64(0), nil) + s.On("StatManifest", mock.Anything, mock.Anything, mock.Anything). + Maybe().Return(false, nil) + s.On("PushManifest", mock.Anything, mock.Anything, mock.Anything, mock.Anything). + Maybe().Return("", nil) + return s +} + +// newPullConfig returns a Pull config suitable for integration tests +// (PlainHTTP, progress disabled, reasonable concurrency). +func newPullConfig() *config.Pull { + cfg := config.NewPull() + cfg.PlainHTTP = true + cfg.DisableProgress = true + cfg.Concurrency = 5 + return cfg +} + +// -------------------------------------------------------------------------- +// Dimension 1: Functional Correctness +// -------------------------------------------------------------------------- + +func TestIntegration_Pull_HappyPath(t *testing.T) { + f := newPullTestFixture(t, 2) + defer f.mr.Close() + + err := f.backend.Pull(context.Background(), f.target, f.cfg) + require.NoError(t, err) + + // Verify PushBlob was called for each layer (2) + config (1) = 3 times. + f.store.AssertNumberOfCalls(t, "PushBlob", 3) + // Verify PushManifest was called once for the manifest itself. + f.store.AssertNumberOfCalls(t, "PushManifest", 1) +} + +func TestIntegration_Pull_BlobAlreadyExists(t *testing.T) { + f := newPullTestFixture(t, 2) + defer f.mr.Close() + + // Replace the storage mock: StatBlob returns true (blob exists locally). + s := storageMock.NewStorage(t) + s.On("StatBlob", mock.Anything, mock.Anything, mock.Anything). + Return(true, nil) + s.On("StatManifest", mock.Anything, mock.Anything, mock.Anything). + Return(false, nil) + s.On("PushManifest", mock.Anything, mock.Anything, mock.Anything, mock.Anything). + Maybe().Return("", nil) + f.backend.store = s + + err := f.backend.Pull(context.Background(), f.target, f.cfg) + require.NoError(t, err) + + // PushBlob should never be called since all blobs already exist. + s.AssertNotCalled(t, "PushBlob", mock.Anything, mock.Anything, mock.Anything, mock.Anything) + // PushManifest is called once (manifest is not a blob, checked via StatManifest). + s.AssertNumberOfCalls(t, "PushManifest", 1) +} + +func TestIntegration_Pull_ConcurrentLayers(t *testing.T) { + const blobCount = 5 + f := newPullTestFixture(t, blobCount) + defer f.mr.Close() + + err := f.backend.Pull(context.Background(), f.target, f.cfg) + require.NoError(t, err) + + // 5 layer blobs + 1 config blob = 6 PushBlob calls. + f.store.AssertNumberOfCalls(t, "PushBlob", blobCount+1) + f.store.AssertNumberOfCalls(t, "PushManifest", 1) +} + +// -------------------------------------------------------------------------- +// Dimension 2: Network Errors +// -------------------------------------------------------------------------- + +func TestIntegration_Pull_ContextTimeout(t *testing.T) { + f := newPullTestFixture(t, 1) + defer f.mr.Close() + + // Add 1s latency per request; context expires in 200ms. + f.mr.WithFault(&helpers.FaultConfig{ + LatencyPerRequest: 1 * time.Second, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 200*time.Millisecond) + defer cancel() + + err := f.backend.Pull(ctx, f.target, f.cfg) + require.Error(t, err) + assert.ErrorIs(t, ctx.Err(), context.DeadlineExceeded) +} + +func TestIntegration_Pull_PartialResponse(t *testing.T) { + f := newPullTestFixture(t, 1) + defer f.mr.Close() + + // FailAfterNBytes on blob — truncate after 5 bytes. + blobPath := fmt.Sprintf("/blobs/%s", f.digests[0]) + f.mr.WithFault(&helpers.FaultConfig{ + PathFaults: map[string]*helpers.FaultConfig{ + blobPath: {FailAfterNBytes: 5}, + }, + }) + + // Short timeout: first attempt fails fast, retry backoff (5s) is + // interrupted by context cancellation. + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + err := f.backend.Pull(ctx, f.target, f.cfg) + require.Error(t, err) +} + +func TestIntegration_Pull_ManifestOK_BlobFails(t *testing.T) { + f := newPullTestFixture(t, 2) + defer f.mr.Close() + + // Make the second blob fail with 500. + blobPath := fmt.Sprintf("/blobs/%s", f.digests[1]) + f.mr.WithFault(&helpers.FaultConfig{ + PathFaults: map[string]*helpers.FaultConfig{ + blobPath: {StatusCodeOverride: 500}, + }, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + err := f.backend.Pull(ctx, f.target, f.cfg) + require.Error(t, err) + assert.Contains(t, err.Error(), "failed to pull blob to local") +} + +// -------------------------------------------------------------------------- +// Dimension 4: Concurrency Safety +// -------------------------------------------------------------------------- + +func TestIntegration_Pull_ConcurrentPartialFailure(t *testing.T) { + const blobCount = 5 + f := newPullTestFixture(t, blobCount) + defer f.mr.Close() + + // Make 2 of the 5 blobs fail with 500. + pathFaults := make(map[string]*helpers.FaultConfig) + for i := 0; i < 2; i++ { + blobPath := fmt.Sprintf("/blobs/%s", f.digests[i]) + pathFaults[blobPath] = &helpers.FaultConfig{StatusCodeOverride: 500} + } + f.mr.WithFault(&helpers.FaultConfig{ + PathFaults: pathFaults, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + err := f.backend.Pull(ctx, f.target, f.cfg) + require.Error(t, err) +} + +// -------------------------------------------------------------------------- +// Dimension 6: Data Integrity +// -------------------------------------------------------------------------- + +func TestIntegration_Pull_TruncatedBlob(t *testing.T) { + f := newPullTestFixture(t, 1) + defer f.mr.Close() + + // Truncate blob after 5 bytes — should cause either read error or + // digest validation failure. + blobPath := fmt.Sprintf("/blobs/%s", f.digests[0]) + f.mr.WithFault(&helpers.FaultConfig{ + PathFaults: map[string]*helpers.FaultConfig{ + blobPath: {FailAfterNBytes: 5}, + }, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + err := f.backend.Pull(ctx, f.target, f.cfg) + require.Error(t, err) +} + +func TestIntegration_Pull_CorruptedBlob(t *testing.T) { + // Build a fixture with 1 blob, then swap it for corrupt content. + f := newPullTestFixture(t, 1) + defer f.mr.Close() + + // Replace the real blob content with garbage under the same digest. + corruptContent := []byte("this-is-totally-wrong-content-that-does-not-match-digest") + f.mr.AddBlobWithDigest(f.digests[0], corruptContent) + + // Update the layer descriptor size to match the corrupt content length, + // otherwise oras-go may reject the response before digest validation. + // Rebuild the manifest with the new size. + f.manifest.Layers[0].Size = int64(len(corruptContent)) + f.mr.AddManifest("test/model:latest", f.manifest) + addManifestWithDigestKey(t, f.mr, "test/model", f.manifest) + + ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + + err := f.backend.Pull(ctx, f.target, f.cfg) + require.Error(t, err) + assert.Contains(t, err.Error(), "digest") +} + +// -------------------------------------------------------------------------- +// Dimension 7: Graceful Shutdown +// -------------------------------------------------------------------------- + +func TestIntegration_Pull_ContextCancelMidDownload(t *testing.T) { + f := newPullTestFixture(t, 3) + defer f.mr.Close() + + // Add latency so we can cancel mid-flight. + f.mr.WithFault(&helpers.FaultConfig{ + LatencyPerRequest: 1 * time.Second, + }) + + ctx, cancel := context.WithCancel(context.Background()) + // Cancel after 200ms. + go func() { + time.Sleep(200 * time.Millisecond) + cancel() + }() + + err := f.backend.Pull(ctx, f.target, f.cfg) + require.Error(t, err) +} + +// -------------------------------------------------------------------------- +// Dimension 8: Idempotency +// -------------------------------------------------------------------------- + +func TestIntegration_Pull_Idempotent(t *testing.T) { + f := newPullTestFixture(t, 2) + defer f.mr.Close() + + // First pull: everything is new. + err := f.backend.Pull(context.Background(), f.target, f.cfg) + require.NoError(t, err) + + reqCountAfterFirst := f.mr.RequestCount() + + // Second pull: all blobs and manifest exist locally. + s2 := storageMock.NewStorage(t) + s2.On("StatBlob", mock.Anything, mock.Anything, mock.Anything). + Return(true, nil) + s2.On("StatManifest", mock.Anything, mock.Anything, mock.Anything). + Return(true, nil) + f.backend.store = s2 + + err = f.backend.Pull(context.Background(), f.target, f.cfg) + require.NoError(t, err) + + reqCountAfterSecond := f.mr.RequestCount() + delta := reqCountAfterSecond - reqCountAfterFirst + t.Logf("first pull requests: %d, second pull delta: %d", reqCountAfterFirst, delta) + + // Both pulls still fetch from the remote (the Pull function always + // fetches the manifest, config, and layers from the registry before + // checking local existence). The key difference is that the second + // pull should NOT write anything to local storage. + s2.AssertNotCalled(t, "PushBlob", mock.Anything, mock.Anything, mock.Anything, mock.Anything) + s2.AssertNotCalled(t, "PushManifest", mock.Anything, mock.Anything, mock.Anything, mock.Anything) +} + +// -------------------------------------------------------------------------- +// Additional: concurrency tracking test +// -------------------------------------------------------------------------- + +func TestIntegration_Pull_ConcurrentLayers_AllStored(t *testing.T) { + const blobCount = 5 + mr := helpers.NewMockRegistry() + defer mr.Close() + + // Create blobs. + digests := make([]string, blobCount) + layers := make([]ocispec.Descriptor, blobCount) + for i := 0; i < blobCount; i++ { + content := []byte(fmt.Sprintf("concurrent-blob-%d-with-enough-padding", i)) + digests[i] = mr.AddBlob(content) + layers[i] = ocispec.Descriptor{ + MediaType: "application/octet-stream", + Digest: godigest.Digest(digests[i]), + Size: int64(len(content)), + } + } + + configContent := []byte(`{"architecture":"arm64"}`) + configDigest := mr.AddBlob(configContent) + manifest := ocispec.Manifest{ + MediaType: ocispec.MediaTypeImageManifest, + Config: ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageConfig, + Digest: godigest.Digest(configDigest), + Size: int64(len(configContent)), + }, + Layers: layers, + } + mr.AddManifest("test/model:latest", manifest) + addManifestWithDigestKey(t, mr, "test/model", manifest) + + // Track which digests were pushed via an atomic counter. + var pushCount atomic.Int32 + s := storageMock.NewStorage(t) + s.On("StatBlob", mock.Anything, mock.Anything, mock.Anything). + Maybe().Return(false, nil) + s.On("PushBlob", mock.Anything, mock.Anything, mock.Anything, mock.Anything). + Maybe(). + Run(func(args mock.Arguments) { + r := args.Get(2).(io.Reader) + _, _ = io.Copy(io.Discard, r) + pushCount.Add(1) + }). + Return("", int64(0), nil) + s.On("StatManifest", mock.Anything, mock.Anything, mock.Anything). + Maybe().Return(false, nil) + s.On("PushManifest", mock.Anything, mock.Anything, mock.Anything, mock.Anything). + Maybe().Return("", nil) + + b := &backend{store: s} + cfg := newPullConfig() + target := mr.Host() + "/test/model:latest" + + err := b.Pull(context.Background(), target, cfg) + require.NoError(t, err) + + // 5 layers + 1 config = 6 + assert.Equal(t, int32(blobCount+1), pushCount.Load(), "all blobs should be pushed to storage") +} From a746cf7701e795afe0d6c7225174cc448bde9cba Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Wed, 8 Apr 2026 11:53:14 +0800 Subject: [PATCH 11/15] test: fix pull integration test spec compliance (idempotent assertion, dedup truncated/partial) - TestIntegration_Pull_Idempotent: remove unused delta variable and misleading comment; clarify that the key invariant is no storage writes on second pull (PushBlob/PushManifest not called), not a reduction in registry requests. - TestIntegration_Pull_TruncatedBlob: differentiate from PartialResponse by serving same-length wrong-byte content via AddBlobWithDigest instead of truncating the stream. This triggers digest-validation failure rather than a read error, and asserts the error message contains "digest". - TestIntegration_Pull_PartialResponse: add RequestCountByPath assertion to verify the blob endpoint was contacted at least once. Signed-off-by: Zhao Chen --- pkg/backend/pull_integration_test.go | 39 +++++++++++++++++----------- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/pkg/backend/pull_integration_test.go b/pkg/backend/pull_integration_test.go index 0326a1a0..ff864349 100644 --- a/pkg/backend/pull_integration_test.go +++ b/pkg/backend/pull_integration_test.go @@ -227,7 +227,8 @@ func TestIntegration_Pull_PartialResponse(t *testing.T) { f := newPullTestFixture(t, 1) defer f.mr.Close() - // FailAfterNBytes on blob — truncate after 5 bytes. + // FailAfterNBytes on blob — connection drops after 5 bytes (very early). + // This simulates a mid-stream connection drop before meaningful data is received. blobPath := fmt.Sprintf("/blobs/%s", f.digests[0]) f.mr.WithFault(&helpers.FaultConfig{ PathFaults: map[string]*helpers.FaultConfig{ @@ -242,6 +243,10 @@ func TestIntegration_Pull_PartialResponse(t *testing.T) { err := f.backend.Pull(ctx, f.target, f.cfg) require.Error(t, err) + + // The blob endpoint must have been contacted at least once. + assert.Positive(t, f.mr.RequestCountByPath(blobPath), + "blob endpoint should have been requested at least once") } func TestIntegration_Pull_ManifestOK_BlobFails(t *testing.T) { @@ -298,20 +303,25 @@ func TestIntegration_Pull_TruncatedBlob(t *testing.T) { f := newPullTestFixture(t, 1) defer f.mr.Close() - // Truncate blob after 5 bytes — should cause either read error or - // digest validation failure. - blobPath := fmt.Sprintf("/blobs/%s", f.digests[0]) - f.mr.WithFault(&helpers.FaultConfig{ - PathFaults: map[string]*helpers.FaultConfig{ - blobPath: {FailAfterNBytes: 5}, - }, - }) + // Serve content that has the exact same length as the real blob but + // contains wrong bytes. This is distinct from PartialResponse (which + // drops the connection mid-stream): here the full Content-Length is + // delivered, but the bytes do not match the registered digest, so the + // pull must fail at digest-validation time. + realBlob := f.blobs[0] + wrongContent := make([]byte, len(realBlob)) + for i := range wrongContent { + wrongContent[i] = 'X' + } + f.mr.AddBlobWithDigest(f.digests[0], wrongContent) ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() err := f.backend.Pull(ctx, f.target, f.cfg) require.Error(t, err) + assert.Contains(t, err.Error(), "digest", + "wrong-content blob should be rejected with a digest validation error") } func TestIntegration_Pull_CorruptedBlob(t *testing.T) { @@ -388,13 +398,12 @@ func TestIntegration_Pull_Idempotent(t *testing.T) { require.NoError(t, err) reqCountAfterSecond := f.mr.RequestCount() - delta := reqCountAfterSecond - reqCountAfterFirst - t.Logf("first pull requests: %d, second pull delta: %d", reqCountAfterFirst, delta) + t.Logf("first pull registry requests: %d, second pull registry requests: %d", + reqCountAfterFirst, reqCountAfterSecond-reqCountAfterFirst) - // Both pulls still fetch from the remote (the Pull function always - // fetches the manifest, config, and layers from the registry before - // checking local existence). The key difference is that the second - // pull should NOT write anything to local storage. + // Second pull should not write to storage (blobs and manifest already + // exist locally). Registry requests may still occur (manifest fetch), + // but no PushBlob or PushManifest calls should be made. s2.AssertNotCalled(t, "PushBlob", mock.Anything, mock.Anything, mock.Anything, mock.Anything) s2.AssertNotCalled(t, "PushManifest", mock.Anything, mock.Anything, mock.Anything, mock.Anything) } From f03a6d8f1da2efac3640e3c09b840d68f550b40e Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Wed, 8 Apr 2026 11:59:52 +0800 Subject: [PATCH 12/15] test: add push integration tests, document ReadCloser leak #491 Add 8 integration tests for the Push workflow covering: - Functional correctness (happy path, blob-already-exists skip) - Network errors (manifest push failure via path fault) - Resource leak documentation (KnownBug reverse assertions for #491) - Data integrity (byte-level blob verification after push) - Graceful shutdown (context cancellation mid-upload) - Idempotency (second push skips existing blobs) The KnownBug tests use TrackingReadCloser with reverse assertions (AssertNotClosed) to document that PullBlob ReadClosers are never closed on either success or error paths. These will fail when #491 is fixed, signaling the assertions should be flipped. Signed-off-by: Zhao Chen --- pkg/backend/push_integration_test.go | 446 +++++++++++++++++++++++++++ 1 file changed, 446 insertions(+) create mode 100644 pkg/backend/push_integration_test.go diff --git a/pkg/backend/push_integration_test.go b/pkg/backend/push_integration_test.go new file mode 100644 index 00000000..08f75c6a --- /dev/null +++ b/pkg/backend/push_integration_test.go @@ -0,0 +1,446 @@ +/* + * Copyright 2025 The CNAI Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package backend + +import ( + "bytes" + "context" + "encoding/json" + "io" + "testing" + "time" + + godigest "github.com/opencontainers/go-digest" + ocispec "github.com/opencontainers/image-spec/specs-go/v1" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/modelpack/modctl/pkg/config" + "github.com/modelpack/modctl/test/helpers" + storageMock "github.com/modelpack/modctl/test/mocks/storage" +) + +// pushTestFixture holds all objects needed by a push integration test. +type pushTestFixture struct { + mr *helpers.MockRegistry + store *storageMock.Storage + backend *backend + target string + cfg *config.Push + blobContent []byte + blobDigest godigest.Digest + configBytes []byte + configDesc ocispec.Descriptor + manifest ocispec.Manifest + manifestRaw []byte +} + +// newPushTestFixture creates a MockRegistry (destination) and a mock Storage +// (source) with a manifest containing 1 layer blob plus a config blob. +func newPushTestFixture(t *testing.T) *pushTestFixture { + t.Helper() + + mr := helpers.NewMockRegistry() + + // Layer blob content in local storage. + blobContent := []byte("push-test-blob-content-with-enough-padding-here") + blobDigest := godigest.FromBytes(blobContent) + + // Config blob content in local storage. + configBytes := []byte(`{"architecture":"amd64"}`) + configDigest := godigest.FromBytes(configBytes) + configDesc := ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageConfig, + Digest: configDigest, + Size: int64(len(configBytes)), + } + + manifest := ocispec.Manifest{ + MediaType: ocispec.MediaTypeImageManifest, + Config: configDesc, + Layers: []ocispec.Descriptor{ + { + MediaType: "application/octet-stream", + Digest: blobDigest, + Size: int64(len(blobContent)), + }, + }, + } + manifestRaw, err := json.Marshal(manifest) + require.NoError(t, err) + + s := newMockStorageForPush(t, manifestRaw, blobContent, blobDigest, configBytes, configDigest) + + return &pushTestFixture{ + mr: mr, + store: s, + backend: &backend{store: s}, + target: mr.Host() + "/test/model:latest", + cfg: newPushConfig(), + blobContent: blobContent, + blobDigest: blobDigest, + configBytes: configBytes, + configDesc: configDesc, + manifest: manifest, + manifestRaw: manifestRaw, + } +} + +// newMockStorageForPush creates a mock Storage that serves as the local source +// for push operations. It sets up PullManifest and PullBlob expectations with +// Maybe() so tests that error out early do not trigger unsatisfied expectation +// failures. +// +// Note: Push passes the FULL repository reference (including host:port) to +// PullManifest and PullBlob, so we use mock.Anything for the repo parameter. +func newMockStorageForPush(t *testing.T, manifestRaw, blobContent []byte, blobDigest godigest.Digest, configBytes []byte, configDigest godigest.Digest) *storageMock.Storage { + t.Helper() + s := storageMock.NewStorage(t) + + s.On("PullManifest", mock.Anything, mock.Anything, "latest"). + Maybe(). + Return(manifestRaw, godigest.FromBytes(manifestRaw).String(), nil) + + s.On("PullBlob", mock.Anything, mock.Anything, blobDigest.String()). + Maybe(). + Return(io.NopCloser(bytes.NewReader(blobContent)), nil) + + s.On("PullBlob", mock.Anything, mock.Anything, configDigest.String()). + Maybe(). + Return(io.NopCloser(bytes.NewReader(configBytes)), nil) + + return s +} + +// newPushConfig returns a Push config suitable for integration tests. +func newPushConfig() *config.Push { + cfg := config.NewPush() + cfg.PlainHTTP = true + return cfg +} + +// -------------------------------------------------------------------------- +// Dimension 1: Functional Correctness +// -------------------------------------------------------------------------- + +func TestIntegration_Push_HappyPath(t *testing.T) { + f := newPushTestFixture(t) + defer f.mr.Close() + + err := f.backend.Push(context.Background(), f.target, f.cfg) + require.NoError(t, err) + + // Verify blob received by registry. + assert.True(t, f.mr.BlobExists(f.blobDigest.String()), + "layer blob should exist in remote registry after push") + + // Verify config received by registry. + assert.True(t, f.mr.BlobExists(f.configDesc.Digest.String()), + "config blob should exist in remote registry after push") + + // Verify manifest received by registry. + assert.True(t, f.mr.ManifestExists("test/model:latest"), + "manifest should exist in remote registry after push") +} + +func TestIntegration_Push_BlobAlreadyExists(t *testing.T) { + f := newPushTestFixture(t) + defer f.mr.Close() + + // Pre-populate the registry with the layer blob, config blob, and manifest + // so that push finds everything already present on the remote. + f.mr.AddBlobWithDigest(f.blobDigest.String(), f.blobContent) + f.mr.AddBlobWithDigest(f.configDesc.Digest.String(), f.configBytes) + f.mr.AddManifest("test/model:latest", f.manifest) + // Also add manifest under its digest key so the Tag/FetchReference works. + manifestDigest := godigest.FromBytes(f.manifestRaw).String() + f.mr.AddManifest("test/model:"+manifestDigest, f.manifest) + + // Use a fresh mock that will fail if PullBlob is called unexpectedly. + s := storageMock.NewStorage(t) + s.On("PullManifest", mock.Anything, mock.Anything, "latest"). + Return(f.manifestRaw, godigest.FromBytes(f.manifestRaw).String(), nil) + // PullBlob should NOT be called because the remote already has everything. + f.backend.store = s + + err := f.backend.Push(context.Background(), f.target, f.cfg) + require.NoError(t, err) + + // Verify PullBlob was never called (remote had all blobs, push skipped). + s.AssertNotCalled(t, "PullBlob", mock.Anything, mock.Anything, mock.Anything) +} + +// -------------------------------------------------------------------------- +// Dimension 2: Network Errors +// -------------------------------------------------------------------------- + +func TestIntegration_Push_ManifestPushFails(t *testing.T) { + f := newPushTestFixture(t) + defer f.mr.Close() + + // Inject a 500 error on manifest push endpoint. + // The actual path is /v2//manifests/latest, so use "/manifests/latest" + // as the suffix to match via effectiveFault's HasSuffix check. + f.mr.WithFault(&helpers.FaultConfig{ + PathFaults: map[string]*helpers.FaultConfig{ + "/manifests/latest": {StatusCodeOverride: 500}, + }, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err := f.backend.Push(ctx, f.target, f.cfg) + require.Error(t, err) + assert.Contains(t, err.Error(), "manifest", + "error should mention manifest failure") +} + +// -------------------------------------------------------------------------- +// Dimension 3: Resource Leak (Known Bug #491) +// -------------------------------------------------------------------------- + +// TestKnownBug_Push_ReadCloserNotClosed_SuccessPath documents that the +// ReadCloser returned by PullBlob is never closed on the success path. +// See: https://github.com/modelpack/modctl/issues/491 +// +// This uses a reverse assertion: AssertNotClosed passes today because the +// bug exists. When the bug is fixed (Close() is called), this test will +// FAIL, signaling that the assertion should be flipped to AssertClosed. +func TestKnownBug_Push_ReadCloserNotClosed_SuccessPath(t *testing.T) { + mr := helpers.NewMockRegistry() + defer mr.Close() + + blobContent := []byte("leak-test-success-blob-content-with-padding") + blobDigest := godigest.FromBytes(blobContent) + + configBytes := []byte(`{"architecture":"amd64"}`) + configDigest := godigest.FromBytes(configBytes) + configDesc := ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageConfig, + Digest: configDigest, + Size: int64(len(configBytes)), + } + + manifest := ocispec.Manifest{ + MediaType: ocispec.MediaTypeImageManifest, + Config: configDesc, + Layers: []ocispec.Descriptor{ + { + MediaType: "application/octet-stream", + Digest: blobDigest, + Size: int64(len(blobContent)), + }, + }, + } + manifestRaw, err := json.Marshal(manifest) + require.NoError(t, err) + + // Create tracking readers for blob and config. + blobTracker := helpers.NewTrackingReadCloser(io.NopCloser(bytes.NewReader(blobContent))) + configTracker := helpers.NewTrackingReadCloser(io.NopCloser(bytes.NewReader(configBytes))) + + s := storageMock.NewStorage(t) + s.On("PullManifest", mock.Anything, mock.Anything, "latest"). + Return(manifestRaw, godigest.FromBytes(manifestRaw).String(), nil) + s.On("PullBlob", mock.Anything, mock.Anything, blobDigest.String()). + Return(io.ReadCloser(blobTracker), nil) + s.On("PullBlob", mock.Anything, mock.Anything, configDigest.String()). + Return(io.ReadCloser(configTracker), nil) + + b := &backend{store: s} + target := mr.Host() + "/test/model:latest" + cfg := newPushConfig() + + err = b.Push(context.Background(), target, cfg) + require.NoError(t, err, "push should succeed") + + // Known bug #491: PullBlob ReadClosers are never closed. + // Reverse assertion — passes today, will fail when bug is fixed. + blobTracker.AssertNotClosed(t) + configTracker.AssertNotClosed(t) +} + +// TestKnownBug_Push_ReadCloserNotClosed_ErrorPath documents that the +// ReadCloser returned by PullBlob is never closed on the error path either. +// See: https://github.com/modelpack/modctl/issues/491 +// +// The blob upload is made to fail by faulting the POST /blobs/uploads/ +// endpoint. The layer's PullBlob is still called (before the upload attempt), +// but Close() is never invoked on the returned reader. +func TestKnownBug_Push_ReadCloserNotClosed_ErrorPath(t *testing.T) { + mr := helpers.NewMockRegistry() + defer mr.Close() + + blobContent := []byte("leak-test-error-blob-content-with-padding") + blobDigest := godigest.FromBytes(blobContent) + + configBytes := []byte(`{"architecture":"amd64"}`) + configDigest := godigest.FromBytes(configBytes) + configDesc := ocispec.Descriptor{ + MediaType: ocispec.MediaTypeImageConfig, + Digest: configDigest, + Size: int64(len(configBytes)), + } + + manifest := ocispec.Manifest{ + MediaType: ocispec.MediaTypeImageManifest, + Config: configDesc, + Layers: []ocispec.Descriptor{ + { + MediaType: "application/octet-stream", + Digest: blobDigest, + Size: int64(len(blobContent)), + }, + }, + } + manifestRaw, err := json.Marshal(manifest) + require.NoError(t, err) + + // Fail the blob upload POST request. + mr.WithFault(&helpers.FaultConfig{ + PathFaults: map[string]*helpers.FaultConfig{ + "/blobs/uploads/": {StatusCodeOverride: 500}, + }, + }) + + blobTracker := helpers.NewTrackingReadCloser(io.NopCloser(bytes.NewReader(blobContent))) + + s := storageMock.NewStorage(t) + s.On("PullManifest", mock.Anything, mock.Anything, "latest"). + Return(manifestRaw, godigest.FromBytes(manifestRaw).String(), nil) + s.On("PullBlob", mock.Anything, mock.Anything, blobDigest.String()). + Maybe(). + Return(io.ReadCloser(blobTracker), nil) + // Config PullBlob may or may not be called depending on whether the layer + // upload error short-circuits before config push. + s.On("PullBlob", mock.Anything, mock.Anything, configDigest.String()). + Maybe(). + Return(io.NopCloser(bytes.NewReader(configBytes)), nil) + + b := &backend{store: s} + target := mr.Host() + "/test/model:latest" + cfg := newPushConfig() + + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + + err = b.Push(ctx, target, cfg) + require.Error(t, err, "push should fail due to blob upload fault") + + // Known bug #491: PullBlob ReadCloser is never closed, even on error. + // Reverse assertion — passes today, will fail when bug is fixed. + if blobTracker.WasClosed() { + // If this branch is reached, the bug may be fixed — flip to AssertClosed. + t.Log("blob tracker was closed — bug #491 may be fixed") + } else { + blobTracker.AssertNotClosed(t) + } +} + +// -------------------------------------------------------------------------- +// Dimension 6: Data Integrity +// -------------------------------------------------------------------------- + +func TestIntegration_Push_VerifyBlobIntegrity(t *testing.T) { + f := newPushTestFixture(t) + defer f.mr.Close() + + err := f.backend.Push(context.Background(), f.target, f.cfg) + require.NoError(t, err) + + // Get the blob from registry and verify exact byte match. + remoteBlob, ok := f.mr.GetBlob(f.blobDigest.String()) + require.True(t, ok, "layer blob should be present in registry") + assert.Equal(t, f.blobContent, remoteBlob, + "remote blob bytes should exactly match source blob") + + // Verify config blob integrity too. + remoteConfig, ok := f.mr.GetBlob(f.configDesc.Digest.String()) + require.True(t, ok, "config blob should be present in registry") + assert.Equal(t, f.configBytes, remoteConfig, + "remote config bytes should exactly match source config") +} + +// -------------------------------------------------------------------------- +// Dimension 7: Graceful Shutdown +// -------------------------------------------------------------------------- + +func TestIntegration_Push_ContextCancelMidUpload(t *testing.T) { + f := newPushTestFixture(t) + defer f.mr.Close() + + // Add latency so we can cancel mid-flight. + f.mr.WithFault(&helpers.FaultConfig{ + LatencyPerRequest: 1 * time.Second, + }) + + ctx, cancel := context.WithCancel(context.Background()) + // Cancel after 200ms. + go func() { + time.Sleep(200 * time.Millisecond) + cancel() + }() + + err := f.backend.Push(ctx, f.target, f.cfg) + require.Error(t, err) +} + +// -------------------------------------------------------------------------- +// Dimension 8: Idempotency +// -------------------------------------------------------------------------- + +func TestIntegration_Push_Idempotent(t *testing.T) { + f := newPushTestFixture(t) + defer f.mr.Close() + + // First push: nothing on remote, everything is uploaded. + err := f.backend.Push(context.Background(), f.target, f.cfg) + require.NoError(t, err) + + // Verify first push stored everything. + assert.True(t, f.mr.BlobExists(f.blobDigest.String()), "blob should exist after first push") + assert.True(t, f.mr.ManifestExists("test/model:latest"), "manifest should exist after first push") + + // Second push: use a fresh mock so we can verify PullBlob call count. + // The registry already has all blobs and manifest from the first push. + // pushIfNotExist checks dst.Exists() (HEAD) and skips if present. + s2 := storageMock.NewStorage(t) + s2.On("PullManifest", mock.Anything, mock.Anything, "latest"). + Return(f.manifestRaw, godigest.FromBytes(f.manifestRaw).String(), nil) + // PullBlob should NOT be called on the second push because the remote + // already has all blobs. But register it with Maybe() just in case. + s2.On("PullBlob", mock.Anything, mock.Anything, mock.Anything). + Maybe(). + Return(io.NopCloser(bytes.NewReader(f.blobContent)), nil) + f.backend.store = s2 + + reqCountBefore := f.mr.RequestCount() + + err = f.backend.Push(context.Background(), f.target, f.cfg) + require.NoError(t, err) + + reqCountAfter := f.mr.RequestCount() + t.Logf("first push completed; second push registry requests: %d", + reqCountAfter-reqCountBefore) + + // PullBlob should not have been called on the second push because the + // remote already had all blobs (pushIfNotExist skips them). + s2.AssertNotCalled(t, "PullBlob", mock.Anything, mock.Anything, mock.Anything) +} + From 9a1e572db34f9b847f248cc9c9c2c2adb97687ae Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Wed, 8 Apr 2026 12:02:40 +0800 Subject: [PATCH 13/15] test: add CLI integration tests for generate command and config validation Signed-off-by: Zhao Chen --- cmd/cli_integration_test.go | 56 +++++++++ cmd/modelfile/generate_integration_test.go | 126 +++++++++++++++++++++ 2 files changed, 182 insertions(+) create mode 100644 cmd/cli_integration_test.go create mode 100644 cmd/modelfile/generate_integration_test.go diff --git a/cmd/cli_integration_test.go b/cmd/cli_integration_test.go new file mode 100644 index 00000000..8b90e3c1 --- /dev/null +++ b/cmd/cli_integration_test.go @@ -0,0 +1,56 @@ +/* + * Copyright 2025 The CNAI Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package cmd + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/modelpack/modctl/pkg/config" +) + +// TestIntegration_CLI_ConcurrencyZero tests that a Pull config with Concurrency=0 +// fails validation with a concurrency-related error message. +func TestIntegration_CLI_ConcurrencyZero(t *testing.T) { + cfg := config.NewPull() + cfg.Concurrency = 0 + err := cfg.Validate() + assert.Error(t, err) + assert.Contains(t, err.Error(), "concurrency") +} + +// TestIntegration_CLI_ConcurrencyNegative tests that a Pull config with a negative +// Concurrency fails validation with a concurrency-related error message. +func TestIntegration_CLI_ConcurrencyNegative(t *testing.T) { + cfg := config.NewPull() + cfg.Concurrency = -1 + err := cfg.Validate() + assert.Error(t, err) + assert.Contains(t, err.Error(), "concurrency") +} + +// TestIntegration_CLI_ExtractFromRemoteNoDir tests that enabling ExtractFromRemote +// without specifying an ExtractDir fails validation. +func TestIntegration_CLI_ExtractFromRemoteNoDir(t *testing.T) { + cfg := config.NewPull() + cfg.ExtractFromRemote = true + cfg.ExtractDir = "" + err := cfg.Validate() + assert.Error(t, err) + assert.Contains(t, err.Error(), "extract dir") +} diff --git a/cmd/modelfile/generate_integration_test.go b/cmd/modelfile/generate_integration_test.go new file mode 100644 index 00000000..2e0e2993 --- /dev/null +++ b/cmd/modelfile/generate_integration_test.go @@ -0,0 +1,126 @@ +/* + * Copyright 2025 The CNAI Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package modelfile + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + configmodelfile "github.com/modelpack/modctl/pkg/config/modelfile" +) + +// resetGenerateConfig resets the package-level global to a fresh instance to avoid +// cross-test state pollution. +func resetGenerateConfig() { + generateConfig = configmodelfile.NewGenerateConfig() +} + +// TestIntegration_CLI_Generate_BasicFlags tests that the generate command writes a +// Modelfile to the specified output directory containing expected directives. +func TestIntegration_CLI_Generate_BasicFlags(t *testing.T) { + // Create temp workspace with model and config files. + workspaceDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(workspaceDir, "model.bin"), []byte("model data"), 0644)) + require.NoError(t, os.WriteFile(filepath.Join(workspaceDir, "config.json"), []byte(`{"model_type":"llama"}`), 0644)) + + outputDir := t.TempDir() + + resetGenerateConfig() + generateConfig.Name = "test-model" + generateConfig.Arch = "transformer" + generateConfig.Output = filepath.Join(outputDir, configmodelfile.DefaultModelfileName) + generateConfig.Workspace = workspaceDir + + err := runGenerate(context.Background()) + require.NoError(t, err) + + modelfilePath := filepath.Join(outputDir, configmodelfile.DefaultModelfileName) + data, err := os.ReadFile(modelfilePath) + require.NoError(t, err) + + content := string(data) + assert.True(t, strings.Contains(content, "NAME"), "expected NAME directive in Modelfile") + assert.True(t, strings.Contains(content, "ARCH"), "expected ARCH directive in Modelfile") + assert.True(t, strings.Contains(content, "MODEL"), "expected MODEL directive in Modelfile") + assert.True(t, strings.Contains(content, "CONFIG"), "expected CONFIG directive in Modelfile") + assert.True(t, strings.Contains(content, "test-model"), "expected model name in Modelfile") + assert.True(t, strings.Contains(content, "transformer"), "expected arch in Modelfile") +} + +// TestIntegration_CLI_Generate_OutputAndOverwrite tests that generate fails when a +// Modelfile already exists (without --overwrite) and succeeds when --overwrite is set. +func TestIntegration_CLI_Generate_OutputAndOverwrite(t *testing.T) { + // Create temp workspace with a model file only (no config.json to keep it simple). + workspaceDir := t.TempDir() + require.NoError(t, os.WriteFile(filepath.Join(workspaceDir, "model.bin"), []byte("model data"), 0644)) + + outputDir := t.TempDir() + modelfilePath := filepath.Join(outputDir, configmodelfile.DefaultModelfileName) + + // Pre-create the Modelfile so it already exists. + require.NoError(t, os.WriteFile(modelfilePath, []byte("# existing"), 0644)) + + t.Run("without overwrite flag errors", func(t *testing.T) { + resetGenerateConfig() + generateConfig.Name = "test-model" + generateConfig.Output = modelfilePath + generateConfig.Workspace = workspaceDir + generateConfig.Overwrite = false + + err := generateConfig.Validate() + require.Error(t, err) + assert.Contains(t, err.Error(), "already exists") + }) + + t.Run("with overwrite flag succeeds", func(t *testing.T) { + resetGenerateConfig() + generateConfig.Name = "test-model" + generateConfig.Output = modelfilePath + generateConfig.Workspace = workspaceDir + generateConfig.Overwrite = true + + err := generateConfig.Validate() + require.NoError(t, err) + + err = runGenerate(context.Background()) + require.NoError(t, err) + + data, err := os.ReadFile(modelfilePath) + require.NoError(t, err) + assert.NotEqual(t, "# existing", string(data), "Modelfile should have been overwritten") + }) +} + +// TestIntegration_CLI_Generate_MutualExclusion tests that providing both a path +// argument and --model-url is rejected as mutually exclusive. +func TestIntegration_CLI_Generate_MutualExclusion(t *testing.T) { + resetGenerateConfig() + + // Both a positional path arg and --model-url being set is mutually exclusive. + // Invoke the cobra RunE directly to exercise the validation in generateCmd. + generateConfig.ModelURL = "https://huggingface.co/some/model" + + err := generateCmd.RunE(generateCmd, []string{"/some/path"}) + require.Error(t, err) + assert.Contains(t, err.Error(), "mutually exclusive") +} From 413ce327f12b7371a304eb614929d9b245593587 Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Wed, 8 Apr 2026 12:19:34 +0800 Subject: [PATCH 14/15] test: add retry-dependent slow tests (//go:build slowtest) Signed-off-by: Zhao Chen --- pkg/backend/pull_slowtest_test.go | 122 ++++++++++++++++++++++++++++++ pkg/backend/push_slowtest_test.go | 51 +++++++++++++ 2 files changed, 173 insertions(+) create mode 100644 pkg/backend/pull_slowtest_test.go create mode 100644 pkg/backend/push_slowtest_test.go diff --git a/pkg/backend/pull_slowtest_test.go b/pkg/backend/pull_slowtest_test.go new file mode 100644 index 00000000..76a5123b --- /dev/null +++ b/pkg/backend/pull_slowtest_test.go @@ -0,0 +1,122 @@ +/* + * Copyright 2025 The CNAI Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//go:build slowtest + +package backend + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/modelpack/modctl/test/helpers" +) + +// TestSlow_Pull_RetryOnTransientError verifies that a pull succeeds when the +// first 2 requests fail transiently (FailOnNthRequest: 2) and the retry +// mechanism eventually succeeds. Requires real backoff — takes 30+ seconds. +func TestSlow_Pull_RetryOnTransientError(t *testing.T) { + f := newPullTestFixture(t, 1) + defer f.mr.Close() + + // First 2 requests fail with 500; request 3+ succeed normally. + // The failCounter is global across all requests (ping, manifest, blobs), + // so a value of 2 means the ping and one other request fail before success. + f.mr.WithFault(&helpers.FaultConfig{ + FailOnNthRequest: 2, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + err := f.backend.Pull(ctx, f.target, f.cfg) + require.NoError(t, err, "pull should eventually succeed after transient failures") +} + +// TestSlow_Pull_RetryExhausted verifies that a pull fails when all registry +// requests return 500, causing all retry attempts to be exhausted. Requires +// real backoff — takes 30+ seconds. +func TestSlow_Pull_RetryExhausted(t *testing.T) { + f := newPullTestFixture(t, 1) + defer f.mr.Close() + + // Every request returns 500, so all retry attempts will be exhausted. + f.mr.WithFault(&helpers.FaultConfig{ + StatusCodeOverride: 500, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + err := f.backend.Pull(ctx, f.target, f.cfg) + require.Error(t, err, "pull should fail when all retries are exhausted") +} + +// TestSlow_Pull_RateLimited verifies that a pull succeeds when the first 3 +// requests fail (simulating rate-limiting) and subsequent requests succeed. +// Requires real backoff — takes 30+ seconds. +func TestSlow_Pull_RateLimited(t *testing.T) { + f := newPullTestFixture(t, 1) + defer f.mr.Close() + + // First 3 requests fail; request 4+ succeed normally. + f.mr.WithFault(&helpers.FaultConfig{ + FailOnNthRequest: 3, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + err := f.backend.Pull(ctx, f.target, f.cfg) + require.NoError(t, err, "pull should eventually succeed after rate-limit simulation") +} + +// TestKnownBug_Pull_AuthErrorStillRetries documents that 401 auth errors are +// currently retried with full backoff instead of failing immediately. +// See: https://github.com/modelpack/modctl/issues/494 +// +// REVERSE ASSERTION: today this test passes because elapsed > 10s (retries +// occur with backoff before exhaustion). When #494 is fixed, auth errors +// should fail immediately (<2s) — flip the assertion to: +// +// assert.Less(t, elapsed, 5*time.Second) +func TestKnownBug_Pull_AuthErrorStillRetries(t *testing.T) { + f := newPullTestFixture(t, 1) + defer f.mr.Close() + + // Every request returns 401; the retry loop should not give up immediately. + f.mr.WithFault(&helpers.FaultConfig{ + StatusCodeOverride: 401, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + start := time.Now() + _ = f.backend.Pull(ctx, f.target, f.cfg) + elapsed := time.Since(start) + + // Known bug #494: auth errors ARE retried, wasting 30+ seconds of backoff. + // Reverse assertion — passes today because retries happen. + // When #494 is fixed, auth errors should fail immediately (<2s). + // Flip assertion to: assert.Less(t, elapsed, 5*time.Second) + assert.Greater(t, elapsed, 10*time.Second, + "auth errors should be retried (known bug #494); elapsed time documents backoff duration") +} diff --git a/pkg/backend/push_slowtest_test.go b/pkg/backend/push_slowtest_test.go new file mode 100644 index 00000000..f798959d --- /dev/null +++ b/pkg/backend/push_slowtest_test.go @@ -0,0 +1,51 @@ +/* + * Copyright 2025 The CNAI Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//go:build slowtest + +package backend + +import ( + "context" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/modelpack/modctl/test/helpers" +) + +// TestSlow_Push_RetryOnTransientError verifies that a push succeeds when the +// blob upload initiation endpoint (/blobs/uploads/) fails on the first 2 +// attempts and then succeeds. Requires real backoff — takes 30+ seconds. +func TestSlow_Push_RetryOnTransientError(t *testing.T) { + f := newPushTestFixture(t) + defer f.mr.Close() + + // Fail the first 2 requests to the blob upload POST endpoint; request 3+ + // succeed normally, allowing the push to complete via retry. + f.mr.WithFault(&helpers.FaultConfig{ + PathFaults: map[string]*helpers.FaultConfig{ + "/blobs/uploads/": {FailOnNthRequest: 2}, + }, + }) + + ctx, cancel := context.WithTimeout(context.Background(), 120*time.Second) + defer cancel() + + err := f.backend.Push(ctx, f.target, f.cfg) + require.NoError(t, err, "push should eventually succeed after transient blob upload failures") +} From e8e213770b195c27df7b7aa6c177a54b3ec8e0fc Mon Sep 17 00:00:00 2001 From: Zhao Chen Date: Wed, 8 Apr 2026 12:21:09 +0800 Subject: [PATCH 15/15] test: add stress tests for file count, nesting depth, and pull cycles (//go:build stress) Signed-off-by: Zhao Chen --- pkg/backend/pull_stress_test.go | 78 +++++++++++++++++++ pkg/modelfile/modelfile_stress_test.go | 100 +++++++++++++++++++++++++ 2 files changed, 178 insertions(+) create mode 100644 pkg/backend/pull_stress_test.go create mode 100644 pkg/modelfile/modelfile_stress_test.go diff --git a/pkg/backend/pull_stress_test.go b/pkg/backend/pull_stress_test.go new file mode 100644 index 00000000..06f5044c --- /dev/null +++ b/pkg/backend/pull_stress_test.go @@ -0,0 +1,78 @@ +/* + * Copyright 2025 The CNAI Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//go:build stress + +package backend + +import ( + "context" + "runtime" + "testing" + "time" + + "github.com/stretchr/testify/require" +) + +// TestStress_Pull_ManyLayers pulls a manifest with 50 blobs using concurrency +// 10 and asserts that the pull completes successfully within 60 seconds. +func TestStress_Pull_ManyLayers(t *testing.T) { + const blobCount = 50 + + f := newPullTestFixture(t, blobCount) + defer f.mr.Close() + + f.cfg.Concurrency = 10 + + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + err := f.backend.Pull(ctx, f.target, f.cfg) + require.NoError(t, err, "pull with %d layers should succeed", blobCount) + + // 50 layer blobs + 1 config blob = 51 PushBlob calls. + f.store.AssertNumberOfCalls(t, "PushBlob", blobCount+1) + f.store.AssertNumberOfCalls(t, "PushManifest", 1) +} + +// TestStress_Pull_RepeatedCycles runs pull 100 times in a loop using a +// 2-blob fixture and asserts that the goroutine count stays stable (within a +// delta of 20), detecting goroutine leaks. +func TestStress_Pull_RepeatedCycles(t *testing.T) { + const cycles = 100 + const goroutineDelta = 20 + + f := newPullTestFixture(t, 2) + defer f.mr.Close() + + goroutinesBefore := runtime.NumGoroutine() + + for i := 0; i < cycles; i++ { + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + err := f.backend.Pull(ctx, f.target, f.cfg) + cancel() + require.NoError(t, err, "pull cycle %d/%d should succeed", i+1, cycles) + } + + // Give any background goroutines a moment to finish. + runtime.Gosched() + + goroutinesAfter := runtime.NumGoroutine() + leaked := goroutinesAfter - goroutinesBefore + require.LessOrEqual(t, leaked, goroutineDelta, + "goroutine count grew by %d after %d pull cycles (before=%d, after=%d); possible goroutine leak", + leaked, cycles, goroutinesBefore, goroutinesAfter) +} diff --git a/pkg/modelfile/modelfile_stress_test.go b/pkg/modelfile/modelfile_stress_test.go new file mode 100644 index 00000000..035292f8 --- /dev/null +++ b/pkg/modelfile/modelfile_stress_test.go @@ -0,0 +1,100 @@ +/* + * Copyright 2025 The CNAI Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +//go:build stress + +package modelfile + +import ( + "fmt" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/stretchr/testify/require" + + configmodelfile "github.com/modelpack/modctl/pkg/config/modelfile" +) + +// TestStress_NearMaxFileCount creates a workspace with 2040 files (near +// MaxWorkspaceFileCount=2048) and asserts that NewModelfileByWorkspace +// handles it without error. +func TestStress_NearMaxFileCount(t *testing.T) { + const fileCount = 2040 + + tempDir := t.TempDir() + + // Create one model file to satisfy the "no model/code/dataset" requirement. + require.NoError(t, os.WriteFile(filepath.Join(tempDir, "model.bin"), []byte("model data"), 0644)) + + // Fill the rest with .py files (code type). + for i := 1; i < fileCount; i++ { + name := fmt.Sprintf("script_%04d.py", i) + content := fmt.Sprintf("# script %d\nprint('hello')\n", i) + require.NoError(t, os.WriteFile(filepath.Join(tempDir, name), []byte(content), 0644)) + } + + config := &configmodelfile.GenerateConfig{ + Workspace: tempDir, + Name: "stress-near-max", + } + + _, err := NewModelfileByWorkspace(tempDir, config) + require.NoError(t, err, "workspace with %d files (near limit %d) should be accepted", fileCount, MaxWorkspaceFileCount) +} + +// TestStress_DeeplyNestedDirs creates a 100-level deep directory structure +// with a model.bin at the deepest level and asserts that +// NewModelfileByWorkspace traverses it without error or stack overflow, and +// that the deep model file is discoverable via GetModels(). +func TestStress_DeeplyNestedDirs(t *testing.T) { + const depth = 100 + + tempDir := t.TempDir() + + // Build the nested path: tempDir/d0/d1/.../d99 + parts := make([]string, depth+1) + parts[0] = tempDir + for i := 0; i < depth; i++ { + parts[i+1] = fmt.Sprintf("d%d", i) + } + deepDir := filepath.Join(parts...) + require.NoError(t, os.MkdirAll(deepDir, 0755)) + + // Place the model at the deepest level. + deepModel := filepath.Join(deepDir, "model.bin") + require.NoError(t, os.WriteFile(deepModel, []byte("deep model data"), 0644)) + + config := &configmodelfile.GenerateConfig{ + Workspace: tempDir, + Name: "stress-deep-nesting", + } + + mf, err := NewModelfileByWorkspace(tempDir, config) + require.NoError(t, err, "deeply nested workspace should not cause error or stack overflow") + + // Build the expected relative path for the deep model. + relParts := make([]string, depth+1) + for i := 0; i < depth; i++ { + relParts[i] = fmt.Sprintf("d%d", i) + } + relParts[depth] = "model.bin" + expectedRelPath := strings.Join(relParts, string(filepath.Separator)) + + models := mf.GetModels() + require.Contains(t, models, expectedRelPath, "deep model file should appear in GetModels()") +}