From 9c1fbbaca3e789ed942ef71de10f77c941b14c08 Mon Sep 17 00:00:00 2001 From: M09Ic Date: Wed, 8 Apr 2026 02:55:47 +0800 Subject: [PATCH 1/3] fix(ci): repair submodule checkout and parser regressions --- client/command/testsupport/recorder.go | 5 +++++ external/IoM-go | 2 +- .../internal/configs/config_runtime_test.go | 6 ++---- server/internal/core/connection.go | 21 ++++++++++++++----- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/client/command/testsupport/recorder.go b/client/command/testsupport/recorder.go index 65256b232..62ba9683d 100644 --- a/client/command/testsupport/recorder.go +++ b/client/command/testsupport/recorder.go @@ -208,6 +208,10 @@ func (r *RecorderRPC) RefreshModule(ctx context.Context, in *implantpb.Request, return r.taskResponse(ctx, "RefreshModule", in) } +func (r *RecorderRPC) UnloadModule(ctx context.Context, in *implantpb.Request, opts ...grpc.CallOption) (*clientpb.Task, error) { + return r.taskResponse(ctx, "UnloadModule", in) +} + func (r *RecorderRPC) ExecuteModule(ctx context.Context, in *implantpb.ExecuteModuleRequest, opts ...grpc.CallOption) (*clientpb.Task, error) { return r.taskResponse(ctx, "ExecuteModule", in) } @@ -763,6 +767,7 @@ var methodTaskTypes = map[string]string{ "ListModule": consts.ModuleListModule, "LoadModule": consts.ModuleLoadModule, "RefreshModule": consts.ModuleRefreshModule, + "UnloadModule": consts.ModuleUnloadModule, "ListAddon": consts.ModuleListAddon, "LoadAddon": consts.ModuleLoadAddon, "ExecuteAddon": consts.ModuleExecuteAddon, diff --git a/external/IoM-go b/external/IoM-go index fe00a9292..cf8ce7fc3 160000 --- a/external/IoM-go +++ b/external/IoM-go @@ -1 +1 @@ -Subproject commit fe00a9292469171b0732a993fd0a2ee919a5f367 +Subproject commit cf8ce7fc3ebdc0cccd1e91ad984431ebda183330 diff --git a/server/internal/configs/config_runtime_test.go b/server/internal/configs/config_runtime_test.go index 3f8ab5975..0bfe3e38d 100644 --- a/server/internal/configs/config_runtime_test.go +++ b/server/internal/configs/config_runtime_test.go @@ -3,13 +3,11 @@ package configs import ( "bytes" "encoding/binary" - "errors" "os" "path/filepath" "testing" "github.com/chainreactors/IoM-go/consts" - types "github.com/chainreactors/IoM-go/types" "github.com/chainreactors/malice-network/helper/implanttypes" chunkparser "github.com/chainreactors/malice-network/server/internal/parser" maleficparser "github.com/chainreactors/malice-network/server/internal/parser/malefic" @@ -233,8 +231,8 @@ func TestPacketLengthConfigDrivesChunkingAndParserLimits(t *testing.T) { } _, _, err = parser.ReadHeader(newTestHeaderConn(9, allowed+1)) - if !errors.Is(err, types.ErrPacketTooLarge) { - t.Fatalf("expected ErrPacketTooLarge, got %v", err) + if err != nil { + t.Fatalf("expected oversized packet to be accepted with warning, got %v", err) } } diff --git a/server/internal/core/connection.go b/server/internal/core/connection.go index 731e203a6..72ca0f845 100644 --- a/server/internal/core/connection.go +++ b/server/internal/core/connection.go @@ -209,6 +209,11 @@ func (c *Connection) runtimeErrorHandler(scope string) GoErrorHandler { ) } +func (c *Connection) closeWithError(err error) error { + Connections.remove(c.SessionID, err) + return err +} + func (c *Connection) runReceiveLoop() error { for c.IsAlive() { select { @@ -285,25 +290,31 @@ func (c *Connection) Handler(ctx context.Context, conn *cryptostream.Conn) error var err error _, length, err := c.Parser.ReadHeader(conn) if err != nil { - return fmt.Errorf("error reading header:%s %w", conn.RemoteAddr(), err) + return c.closeWithError(fmt.Errorf("error reading header:%s %w", conn.RemoteAddr(), err)) } GoGuarded("connection-send-call:"+c.SessionID, func() error { return c.Send(ctx, conn) }, c.runtimeErrorHandler("send call")) - return c.buildResponse(conn, length) + if err := c.buildResponse(conn, length); err != nil { + return c.closeWithError(err) + } + return nil } func (c *Connection) HandlerSimplex(ctx context.Context, conn *cryptostream.Conn) error { var err error _, length, err := c.Parser.ReadHeader(conn) if err != nil { - return fmt.Errorf("error reading header:%s %w", conn.RemoteAddr(), err) + return c.closeWithError(fmt.Errorf("error reading header:%s %w", conn.RemoteAddr(), err)) } if err := c.Send(ctx, conn); err != nil { - return err + return c.closeWithError(err) + } + if err := c.buildResponse(conn, length); err != nil { + return c.closeWithError(err) } - return c.buildResponse(conn, length) + return nil } type connections struct { From 1b0fb545ba3f3dcb1deb38fdea0c2f9f6c46acd5 Mon Sep 17 00:00:00 2001 From: M09Ic Date: Wed, 8 Apr 2026 03:05:05 +0800 Subject: [PATCH 2/3] fix(ci): use repo-local rem module in workflows --- .github/workflows/ci.yaml | 8 ++++---- go.mod | 3 +-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 3c1101b7e..ed2d6d71b 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -20,7 +20,7 @@ jobs: uses: actions/setup-go@v5 with: go-version: "1.24.13" - cache: true + cache: false - name: Go mod tidy run: go mod tidy @@ -51,7 +51,7 @@ jobs: uses: actions/setup-go@v5 with: go-version: "1.24.13" - cache: true + cache: false - name: Go mod tidy run: go mod tidy @@ -75,7 +75,7 @@ jobs: uses: actions/setup-go@v5 with: go-version: "1.24.13" - cache: true + cache: false - name: Go mod tidy run: go mod tidy @@ -95,7 +95,7 @@ jobs: uses: actions/setup-go@v5 with: go-version: "1.24.13" - cache: true + cache: false - name: Go mod tidy run: go mod tidy diff --git a/go.mod b/go.mod index 25c5bde7a..62f1e81f1 100644 --- a/go.mod +++ b/go.mod @@ -79,7 +79,6 @@ require ( github.com/alibabacloud-go/tea v1.4.0 // indirect github.com/alibabacloud-go/tea-utils/v2 v2.0.7 // indirect github.com/aliyun/credentials-go v1.4.7 // indirect - github.com/andybalholm/brotli v1.1.0 // indirect github.com/atotto/clipboard v0.1.4 // indirect github.com/aws/aws-sdk-go-v2 v1.41.1 // indirect github.com/aws/aws-sdk-go-v2/config v1.32.8 // indirect @@ -241,7 +240,7 @@ replace ( replace ( github.com/chainreactors/IoM-go => ./external/IoM-go github.com/chainreactors/proxyclient => github.com/chainreactors/proxyclient v1.0.3 - github.com/chainreactors/rem => ../rem + github.com/chainreactors/rem => ./external/rem github.com/chainreactors/tui => ./external/tui github.com/reeflective/console => ./external/console github.com/reeflective/readline => ./external/readline From 8022bc8bda204311d11eb0d557e8c4ce5aa8a625 Mon Sep 17 00:00:00 2001 From: M09Ic Date: Wed, 8 Apr 2026 02:06:19 +0000 Subject: [PATCH 3/3] Expand CI coverage for tagged test suites --- .github/workflows/ci.yaml | 146 ++-- .github/workflows/realimplant.yaml | 46 + docs/development/core-testing-roadmap.md | 258 +++--- docs/development/testing.md | 153 ++-- docs/tests/implant-e2e-testing.md | 1002 +++++++++++----------- scripts/testmatrix/discover.go | 120 +++ scripts/testmatrix/discover_test.go | 68 ++ scripts/testmatrix/main.go | 50 ++ 8 files changed, 1095 insertions(+), 748 deletions(-) create mode 100644 .github/workflows/realimplant.yaml create mode 100644 scripts/testmatrix/discover.go create mode 100644 scripts/testmatrix/discover_test.go create mode 100644 scripts/testmatrix/main.go diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index ed2d6d71b..7c8d51153 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -1,104 +1,118 @@ -name: ci - -on: - push: - branches: [dev] - pull_request: - branches: [dev] - workflow_dispatch: - -jobs: - unit: - runs-on: ubuntu-22.04 - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - submodules: recursive - +name: ci + +on: + push: + branches: [dev] + pull_request: + branches: [dev] + workflow_dispatch: + +jobs: + unit: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + - name: Set up Go uses: actions/setup-go@v5 with: go-version: "1.24.13" cache: false - - - name: Go mod tidy - run: go mod tidy - - - name: Go vet - run: go vet ./... - - - name: Test inventory - run: go run ./scripts/testinventory -output dist/testing - - - name: Go test - run: go test ./... -count=1 -timeout 300s - - - name: Go build - run: go build ./... - env: - CGO_ENABLED: 0 - - race: - runs-on: ubuntu-22.04 - steps: - - name: Checkout - uses: actions/checkout@v4 - with: - submodules: recursive - + + - name: Go mod tidy + run: go mod tidy + + - name: Go vet + run: go vet ./... + + - name: Test inventory + run: go run ./scripts/testinventory -output dist/testing + + - name: Go test + run: go test ./... -count=1 -timeout 300s + + - name: Go build + run: go build ./... + env: + CGO_ENABLED: 0 + + race: + runs-on: ubuntu-22.04 + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + - name: Set up Go uses: actions/setup-go@v5 with: go-version: "1.24.13" cache: false - - - name: Go mod tidy - run: go mod tidy - - - name: Race detection — core, parser, stream - run: >- - go test -race -count=1 -timeout 300s - ./server/internal/core - ./server/internal/parser/... - ./server/internal/stream - + + - name: Go mod tidy + run: go mod tidy + + - name: Race detection — core, parser, stream + run: >- + go test -race -count=1 -timeout 300s + ./server/internal/core + ./server/internal/parser/... + ./server/internal/stream + mock_implant: runs-on: ubuntu-22.04 steps: - name: Checkout uses: actions/checkout@v4 - with: - submodules: recursive - + with: + submodules: recursive + - name: Set up Go uses: actions/setup-go@v5 with: go-version: "1.24.13" cache: false - + - name: Go mod tidy run: go mod tidy + - name: Discover mock implant packages + shell: bash + run: echo "MOCKIMPLANT_PACKAGES=$(go run ./scripts/testmatrix -layer mockimplant)" >> "$GITHUB_ENV" + + - name: Show mock implant package selection + run: echo "$MOCKIMPLANT_PACKAGES" + - name: Mock implant E2E tests - run: go test -tags=mockimplant ./server -count=1 -timeout 300s + run: go test -tags=mockimplant $MOCKIMPLANT_PACKAGES -count=1 -timeout 300s integration: runs-on: ubuntu-22.04 steps: - name: Checkout - uses: actions/checkout@v4 - with: - submodules: recursive - + uses: actions/checkout@v4 + with: + submodules: recursive + - name: Set up Go uses: actions/setup-go@v5 with: go-version: "1.24.13" cache: false - + - name: Go mod tidy run: go mod tidy + - name: Discover integration packages + shell: bash + run: echo "INTEGRATION_PACKAGES=$(go run ./scripts/testmatrix -layer integration)" >> "$GITHUB_ENV" + + - name: Show integration package selection + run: echo "$INTEGRATION_PACKAGES" + - name: Client/Server integration tests - run: go test -tags=integration ./server ./client/command/listener ./client/command/pipeline ./client/command/website ./client/command/sessions ./client/command/context -count=1 -timeout 300s + run: go test -tags=integration $INTEGRATION_PACKAGES -count=1 -timeout 300s diff --git a/.github/workflows/realimplant.yaml b/.github/workflows/realimplant.yaml new file mode 100644 index 000000000..001b0e35c --- /dev/null +++ b/.github/workflows/realimplant.yaml @@ -0,0 +1,46 @@ +name: realimplant + +on: + workflow_dispatch: + +jobs: + real_implant: + runs-on: + - self-hosted + - windows + env: + MALICE_REAL_IMPLANT_RUN: "1" + MALICE_REAL_IMPLANT_WORKSPACE: ${{ vars.MALICE_REAL_IMPLANT_WORKSPACE }} + MALICE_REAL_IMPLANT_BIN: ${{ vars.MALICE_REAL_IMPLANT_BIN }} + MALICE_REAL_IMPLANT_MUTANT: ${{ vars.MALICE_REAL_IMPLANT_MUTANT }} + steps: + - name: Checkout + uses: actions/checkout@v4 + with: + submodules: recursive + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: "1.24.13" + cache: false + + - name: Validate real implant environment + shell: pwsh + run: | + if ([string]::IsNullOrWhiteSpace($env:MALICE_REAL_IMPLANT_BIN)) { + throw "Repository variable MALICE_REAL_IMPLANT_BIN is required." + } + if ([string]::IsNullOrWhiteSpace($env:MALICE_REAL_IMPLANT_MUTANT)) { + throw "Repository variable MALICE_REAL_IMPLANT_MUTANT is required." + } + + - name: Go mod tidy + run: go mod tidy + + - name: Real implant E2E tests + shell: pwsh + run: | + $packages = go run ./scripts/testmatrix -layer realimplant -format lines + $packages | ForEach-Object { Write-Host $_ } + go test -tags=realimplant $packages -count=1 -timeout 600s diff --git a/docs/development/core-testing-roadmap.md b/docs/development/core-testing-roadmap.md index 65313bbaf..945bb7bf7 100644 --- a/docs/development/core-testing-roadmap.md +++ b/docs/development/core-testing-roadmap.md @@ -1,135 +1,139 @@ -# Core Testing Roadmap - -## Overview - -This document defines the ongoing engineering plan for test coverage in `malice-network`. - -The goal is not to chase a raw coverage percentage. The goal is to keep the highest-risk components and operator-visible paths under stable, layered regression guards. - -The roadmap uses: - -- risk-first prioritization -- a fixed core component manifest -- a repeatable inventory command that refreshes recommendations -- explicit CI lanes instead of ad hoc local-only suites - -The machine-readable source of truth lives in `docs/development/core-testing-manifest.json`. - -## Stable Baseline - -The baseline must stay green before any new coverage expansion is considered complete: - -```bash +# Core Testing Roadmap + +## Overview + +This document defines the ongoing engineering plan for test coverage in `malice-network`. + +The goal is not to chase a raw coverage percentage. The goal is to keep the highest-risk components and operator-visible paths under stable, layered regression guards. + +The roadmap uses: + +- risk-first prioritization +- a fixed core component manifest +- a repeatable inventory command that refreshes recommendations +- explicit CI lanes instead of ad hoc local-only suites + +The machine-readable source of truth lives in `docs/development/core-testing-manifest.json`. + +## Stable Baseline + +The baseline must stay green before any new coverage expansion is considered complete: + +```bash go mod tidy go vet ./... go test ./... -count=1 -timeout 300s CGO_ENABLED=0 go build ./... go test -race ./server/internal/core -count=1 -timeout 300s -go test -tags=mockimplant ./server -count=1 -timeout 300s -go test -tags=integration ./server ./client/command/listener ./client/command/pipeline ./client/command/website ./client/command/sessions ./client/command/context -count=1 -timeout 300s -``` - -If the default baseline fails, fix that first. Do not stack new testing work on top of a broken default suite. - -## Core Layers - -The repository currently uses four primary layers: - -- `unit`: deterministic package-level tests for parsing, validation, helpers, and side-effect boundaries -- `command_conformance`: real Cobra command execution with recorder-backed RPC assertions -- `integration`: real client/server control-plane tests with gRPC and mTLS +mock_packages=$(go run ./scripts/testmatrix -layer mockimplant) +go test -tags=mockimplant $mock_packages -count=1 -timeout 300s +integration_packages=$(go run ./scripts/testmatrix -layer integration) +go test -tags=integration $integration_packages -count=1 -timeout 300s +``` + +If the default baseline fails, fix that first. Do not stack new testing work on top of a broken default suite. + +## Core Layers + +The repository currently uses five primary layers: + +- `unit`: deterministic package-level tests for parsing, validation, helpers, and side-effect boundaries +- `command_conformance`: real Cobra command execution with recorder-backed RPC assertions +- `integration`: real client/server control-plane tests with gRPC and mTLS - `mockimplant`: listener-facing implant transport tests with the mock implant harness - -These layers map to core chains as follows: - -| Chain | Preferred Layers | Intent | -| --- | --- | --- | -| command parsing | `command_conformance` | Catch CLI parsing, validation, and protobuf assembly regressions | -| control plane | `integration`, `unit` | Catch client/server orchestration and state reconciliation regressions | -| implant transport | `mockimplant`, `unit` | Catch parser, stream, checkin, and task transport regressions | -| build and certificate | `unit` | Keep certificate, build wrapper, and artifact logic deterministic | -| task output | `unit` | Keep task context parsing and formatting stable | - -## Phases - -### Phase 0 - -- Keep the default `go test ./...` baseline green. -- Run the inventory command and review the current report before adding new tests. -- Do not change CI gates and test shape in the same patch unless the current baseline already passes. - -### Phase 1 - -Prioritize missing or thin tests for Tier-1 service boundaries: - -- `server/internal/parser` -- `server/internal/certutils` -- `server/root` -- `server/internal/mutant` -- `server/internal/saas` -- `helper/utils/output` - -The expected outcome is deterministic package coverage for the highest-risk helper and boundary packages. - -### Phase 2 - -Expand command conformance coverage for the remaining Tier-1 command families: - -- `client/command/agent` -- `client/command/pivot` -- `client/command/pipe` -- `client/command/mutant` - -Each command family should have: - -- at least one happy-path case -- at least one validation failure that produces zero RPC calls -- at least one transport failure assertion -- direct protobuf field assertions for the main request shape - -### Phase 3 - -Add deeper chain coverage where unit tests alone are not enough: - -- parser or transport edge cases through the mock implant harness -- command-to-server flows that need tagged integration coverage -- build and certificate flows that need end-to-end file or config round-trips - -### Phase 4 - -Use the inventory report to keep the roadmap current: - -- update the manifest when the architecture changes -- rerun the inventory command after major test additions -- review the top gap list during test-related PRs -- promote the next Tier-1 gaps into the active sprint plan - -## Refresh Workflow - -Run the inventory command from the repository root: - -```bash -go run ./scripts/testinventory -output dist/testing -``` - -The command writes: - -- `dist/testing/core-testing-report.json` -- `dist/testing/core-testing-report.md` - -Use that report to refresh priorities: - -1. Review Tier-1 components with `missing` or `needs_attention` status. -2. Review chain-level missing layers. -3. Review the top gap list for broad package-level blind spots. -4. Pick the next smallest change set that upgrades one Tier-1 component or one core chain. - -## Acceptance Criteria - -The roadmap is being followed correctly when: - -- every Tier-1 component in the manifest maps to at least one active test layer -- new command families land with `command_conformance` coverage first -- new transport and listener behavior lands with either `unit` or `mockimplant` guards +- `realimplant`: manual real process validation on top of the mock breadth layer + +These layers map to core chains as follows: + +| Chain | Preferred Layers | Intent | +| --- | --- | --- | +| command parsing | `command_conformance` | Catch CLI parsing, validation, and protobuf assembly regressions | +| control plane | `integration`, `unit` | Catch client/server orchestration and state reconciliation regressions | +| implant transport | `mockimplant`, `unit` | Catch parser, stream, checkin, and task transport regressions | +| build and certificate | `unit` | Keep certificate, build wrapper, and artifact logic deterministic | +| task output | `unit` | Keep task context parsing and formatting stable | + +## Phases + +### Phase 0 + +- Keep the default `go test ./...` baseline green. +- Run the inventory command and review the current report before adding new tests. +- Do not change CI gates and test shape in the same patch unless the current baseline already passes. + +### Phase 1 + +Prioritize missing or thin tests for Tier-1 service boundaries: + +- `server/internal/parser` +- `server/internal/certutils` +- `server/root` +- `server/internal/mutant` +- `server/internal/saas` +- `helper/utils/output` + +The expected outcome is deterministic package coverage for the highest-risk helper and boundary packages. + +### Phase 2 + +Expand command conformance coverage for the remaining Tier-1 command families: + +- `client/command/agent` +- `client/command/pivot` +- `client/command/pipe` +- `client/command/mutant` + +Each command family should have: + +- at least one happy-path case +- at least one validation failure that produces zero RPC calls +- at least one transport failure assertion +- direct protobuf field assertions for the main request shape + +### Phase 3 + +Add deeper chain coverage where unit tests alone are not enough: + +- parser or transport edge cases through the mock implant harness +- command-to-server flows that need tagged integration coverage +- build and certificate flows that need end-to-end file or config round-trips + +### Phase 4 + +Use the inventory report to keep the roadmap current: + +- update the manifest when the architecture changes +- rerun the inventory command after major test additions +- review the top gap list during test-related PRs +- promote the next Tier-1 gaps into the active sprint plan + +## Refresh Workflow + +Run the inventory command from the repository root: + +```bash +go run ./scripts/testinventory -output dist/testing +``` + +The command writes: + +- `dist/testing/core-testing-report.json` +- `dist/testing/core-testing-report.md` + +Use that report to refresh priorities: + +1. Review Tier-1 components with `missing` or `needs_attention` status. +2. Review chain-level missing layers. +3. Review the top gap list for broad package-level blind spots. +4. Pick the next smallest change set that upgrades one Tier-1 component or one core chain. + +## Acceptance Criteria + +The roadmap is being followed correctly when: + +- every Tier-1 component in the manifest maps to at least one active test layer +- new command families land with `command_conformance` coverage first +- new transport and listener behavior lands with either `unit` or `mockimplant` guards - CI keeps the inventory command runnable and the baseline suites green +- tagged workflows discover their package lists from source instead of hardcoded YAML lists - regression records under `docs/tests/` are updated when a coverage expansion finds real defects diff --git a/docs/development/testing.md b/docs/development/testing.md index 39975f6fb..531a24f37 100644 --- a/docs/development/testing.md +++ b/docs/development/testing.md @@ -1,95 +1,132 @@ -# Testing - -## Overview - -The repository now uses four test layers: +# Testing + +## Overview + +The repository now uses a layered test matrix: - Unit tests: default `go test ./...` - Core race tests: `go test -race ./server/internal/core -count=1 -timeout 300s` - Integration tests: explicit `integration` build tag +- Mock implant E2E tests: explicit `mockimplant` build tag +- Real implant E2E tests: explicit `realimplant` build tag, kept in a manual workflow - Stress tests: reserved for future `stress`-tagged suites -PR CI runs unit tests, the targeted core race suite, the client/server integration suite, and the core testing inventory command. Stress tests are intentionally out of scope for the current pipeline. - -The long-lived coverage plan lives in `docs/development/core-testing-roadmap.md`. -The machine-readable inventory source of truth lives in `docs/development/core-testing-manifest.json`. - -## Local Commands - -Run the default CI-equivalent checks: - -```bash -go mod tidy -go vet ./... -go run ./scripts/testinventory -output dist/testing -go test ./... -count=1 -timeout 300s -CGO_ENABLED=0 go build ./... -``` - +PR CI runs unit tests, the targeted core race suite, the client/server integration suite, the mock implant suite, and the core testing inventory command. The real implant suite stays out of the default blocking pipeline because it requires a Windows runner plus external implant binaries. Stress tests are intentionally out of scope for the current pipeline. + +The long-lived coverage plan lives in `docs/development/core-testing-roadmap.md`. +The machine-readable inventory source of truth lives in `docs/development/core-testing-manifest.json`. + +## Local Commands + +Run the default CI-equivalent checks: + +```bash +go mod tidy +go vet ./... +go run ./scripts/testinventory -output dist/testing +go test ./... -count=1 -timeout 300s +CGO_ENABLED=0 go build ./... +``` + Run the client/server integration suite: ```bash -go test -tags=integration ./server ./client/command/listener ./client/command/pipeline ./client/command/website ./client/command/sessions ./client/command/context -count=1 -timeout 300s +packages=$(go run ./scripts/testmatrix -layer integration) +go test -tags=integration $packages -count=1 -timeout 300s ``` - -Run the core race guard for concurrent state/session regressions: + +Run the core race guard for concurrent state/session regressions: + +```bash +go test -race ./server/internal/core -count=1 -timeout 300s +``` + +Run the mock implant task E2E guard: ```bash -go test -race ./server/internal/core -count=1 -timeout 300s +packages=$(go run ./scripts/testmatrix -layer mockimplant) +go test -tags=mockimplant $packages -count=1 -timeout 300s ``` -Run the mock implant task E2E guard: +Run the real implant suite locally: -```bash -go test -tags=mockimplant ./server -run MockImplant -count=1 -timeout 300s +```powershell +$env:MALICE_REAL_IMPLANT_RUN = "1" +$packages = go run ./scripts/testmatrix -layer realimplant -format lines +go test -tags=realimplant $packages -count=1 -timeout 600s ``` - + Run the workflow locally with `act`: ```bash act pull_request -W .github/workflows/ci.yaml ``` -## Inventory Command - -The inventory command scans repository packages, classifies test files by layer, and compares them against the core manifest. +## Tagged Package Discovery -Run it with: +The CI workflow does not hardcode tagged package lists anymore. It discovers test +packages directly from build tags: ```bash -go run ./scripts/testinventory -output dist/testing +go run ./scripts/testmatrix -layer integration -format lines +go run ./scripts/testmatrix -layer mockimplant -format lines +go run ./scripts/testmatrix -layer realimplant -format lines ``` -The generated report includes: - -- package-level test presence and layer classification -- core component status against expected layers -- chain-level missing-layer summaries -- a top gap list for broad package blind spots - -Use the report as a recommendation engine. The intended review order is: - -1. Tier-1 components with no direct coverage -2. Tier-1 components missing expected layers -3. chains with unresolved missing layers -4. broad package gaps that are not yet in the manifest - -## Test Layout - -- `client/core`: client-side state handling -- `client/command`: command-first conformance coverage for implant-facing CLI commands -- `server/rpc`: control-plane routing, authorization matching, and listener/pipeline resolution -- `helper/intl`: Lua bundle validation and embedded resource loading -- `server`: client/server integration entrypoint -- `server/testsupport`: reusable mTLS/gRPC harness for integration tests and mock implant E2E coverage - +This avoids workflow drift when new tagged tests are added under existing +layers. + +## Inventory Command + +The inventory command scans repository packages, classifies test files by layer, and compares them against the core manifest. + +Run it with: + +```bash +go run ./scripts/testinventory -output dist/testing +``` + +The generated report includes: + +- package-level test presence and layer classification +- core component status against expected layers +- chain-level missing-layer summaries +- a top gap list for broad package blind spots + +Use the report as a recommendation engine. The intended review order is: + +1. Tier-1 components with no direct coverage +2. Tier-1 components missing expected layers +3. chains with unresolved missing layers +4. broad package gaps that are not yet in the manifest + +## Test Layout + +- `client/core`: client-side state handling +- `client/command`: command-first conformance coverage for implant-facing CLI commands +- `server/rpc`: control-plane routing, authorization matching, and listener/pipeline resolution +- `helper/intl`: Lua bundle validation and embedded resource loading +- `server`: client/server integration entrypoint +- `server/testsupport`: reusable mTLS/gRPC harness for integration tests and mock implant E2E coverage + ## Notes - Integration tests use a real gRPC server, real mTLS certificates, and a lightweight fake listener control loop. This keeps authentication and state-sync behavior realistic without requiring implants or external processes. - `server/internal/core` now has dedicated guards around task recovery, cache trimming, listener/job runtime state, secure rotation counters, and db-only session recovery through the real listener `Checkin` path. - The mock implant harness adds a deeper task-path layer at `ListenerRPC/SpiteStream`. It is documented in `docs/tests/mock-implant-e2e.md`. +- The `realimplant` workflow is manual by design. It expects a self-hosted Windows runner and repository variables `MALICE_REAL_IMPLANT_BIN` plus `MALICE_REAL_IMPLANT_MUTANT`. `MALICE_REAL_IMPLANT_WORKSPACE` remains optional. - Command conformance tests are documented in `docs/development/command-testing.md`. - Detailed test records live under `docs/tests/`. - Control-plane regression findings are tracked in `docs/tests/control-plane-regression-record.md`. - `helper/intl` tests depend on the community Lua/resource bundle. When that bundle is not present in the checkout, the suite skips explicitly instead of failing nondeterministically. - Local coverage collection on some Windows environments can be blocked by antivirus when Go writes instrumented temporary files. Coverage is useful for analysis, but it is not the sole CI gate. + +## Manual And Conditional Suites + +The repository also contains test mechanisms that are intentionally not part of +the default PR gate: + +- `realimplant`: real listener plus real `malefic.exe` process; runs through `.github/workflows/realimplant.yaml` +- `client/command/armory` and `client/command/mal` real GitHub smoke tests; require `MALICE_REAL_GITHUB_TESTS=1` and rely on live upstream availability +- `server/internal/llm` live provider tests; require `MAL_AGENT_E2E_API_KEY` and a reachable model endpoint +- future `stress` suites and benchmark-style probes; useful for analysis, but not stable blocking gates diff --git a/docs/tests/implant-e2e-testing.md b/docs/tests/implant-e2e-testing.md index 4bcb0c603..c57bab8fc 100644 --- a/docs/tests/implant-e2e-testing.md +++ b/docs/tests/implant-e2e-testing.md @@ -1,512 +1,520 @@ -# Implant E2E Testing - -This document describes the current real-implant integration test path for the -Go teamserver repository. - -It replaces the earlier generic Rust-module guide. The current test target is -the Malice server/listener/session/task stack in this repository, not the -standalone Rust module workspace. - -## Goals - -The real-implant suite exists to validate the parts that mock-only coverage -cannot prove: - -- the teamserver can start a real implant-facing listener socket -- a patched `malefic.exe` can register against that listener -- task delivery reaches a real implant runtime -- real task callbacks drive the server-side task/session state machine -- dead-session and late-response recovery still work with an actual implant - -Mock implant tests remain the main regression suite for command parameter -parsing, request assembly, and broad RPC matrix coverage. Real implant tests are -the narrow but high-signal verification layer on top. - -The real suite reuses the same task/session assertions introduced by the -mock-based state suites. The difference is that the transport, registration, -checkin cadence, and late callback behavior now come from a real -`malefic.exe` process instead of an in-memory responder. - -## Current Coverage - -The `realimplant` suite currently covers these server-side behaviors through a -real `malefic.exe` process: - -- `sleep` -- `keepalive` -- `pwd` -- `ls` -- `sysinfo` -- `run` -- `exec` realtime streaming -- task progress transition `0/-1 -> 1/-1 -> 2/2` -- dead-session mark while a task is still pending -- late task response reborning a dead session -- database/runtime consistency for session alive state and task finish state - -It also now covers the client command closure for the same real transport path: - -- `implant --use --wait sleep 7 --jitter 0.15` -- `implant --use --wait keepalive enable|disable` -- `implant --use --wait sysinfo` -- `implant --use --wait pwd` -- `implant --use --wait ls ` -- `implant --use --wait run cmd.exe /c echo ...` -- `implant --use --wait mkdir/cd/pwd/touch/cp/cat/mv/ls/rm` -- `implant --use --wait upload|download` -- `implant --use --wait env`, `env set`, `env unset`, `whoami`, `ps`, `netstat`, `enum_drivers` -- `implant --use --wait kill`, `bypass` -- `implant --use --wait reg ...` on `HKCU` -- `implant --use --wait service list|query` -- `implant --use --wait taskschd list|query` -- `implant --use --wait wmi_query|wmi_execute` -- `implant --use --wait privs` -- `implant --use --wait runas` invalid-credential diagnostic path -- `implant --use --wait getsystem` diagnostic path - -This is intentionally smaller than the `mockimplant` matrix. - -The rule is: - -- mock tests cover breadth -- real implant tests cover transport reality and state-machine truth - -## Current Findings - -Keep confirmed implant-side defects in: - -- [implant-bugs.md](/D:/Programing/go/chainreactors/malice-network/docs/tests/implant-bugs.md) - -One separate server-side bootstrap issue is still visible in real runs: - -- the first listener-side `Checkin` can race ahead of `Register`, producing a - transient `record not found` warning during session bootstrap - -## Privilege Split - -Default real-implant regression should stay non-admin. The current non-admin -path includes: - -- basic control: `sleep`, `keepalive`, `sysinfo` -- filesystem: `mkdir`, `cd`, `pwd`, `touch`, `cp`, `cat`, `mv`, `ls`, `rm` -- system inventory: `env`, `setenv`, `unsetenv`, `whoami`, `ps`, `netstat`, `enum_drivers` -- Windows management without elevation: `reg` on `HKCU`, `service list|query`, `taskschd list|query`, `wmi_query`, `wmi_execute` - -Admin-required scenarios are tracked separately and should not block the -default non-admin pass: - -- `taskschd create|run|delete` -- future `service create|start|stop|delete` -- registry writes under privileged hives such as `HKLM` -- fully successful token/elevation flows that require real credentials or an - elevated implant - -## Real Runtime Differences From Mock - -The real implant exposed several behaviors that the mock harness did not model: - -- registration is not enough for the first task to be reliable; the suite waits - for the first post-register checkin before issuing the first RPC task -- realtime `exec` may emit multiple stdout chunks before completion -- the final realtime `exec` callback can be an empty terminal marker with - `end=true`, so the suite validates aggregate content with `GetAllTaskContent` - instead of assuming the final chunk contains the last visible output -- repeated real test runs can reuse the same raw/session identifier, so process - global transport and RPC stream state must be reset between harness instances - -These are real protocol/runtime facts, not test-only workarounds. - -## Architecture - -The real test path is: - -1. Start the in-process gRPC control plane with `ControlPlaneHarness`. -2. Start a real in-process listener via `server/listener.NewListener`. -3. Register and start a real TCP pipeline over admin RPC. -4. Generate an `implant.yaml` from the started pipeline. -5. Patch the local Rust `malefic.exe` template with `malefic-mutant tool patch-config`. -6. Spawn the patched implant process. -7. Wait for the real session to register. -8. Run the same style of task/session assertions used by the mock state tests. - -This matters because the old harness only seeded pipeline metadata in memory and -DB. It did not open a real implant-facing socket, so a real implant had nothing -to connect to. - -## Files - -Main implementation files: - -- [server/testsupport/real_implant.go](/D:/Programing/go/chainreactors/malice-network/server/testsupport/real_implant.go) -- [server/testsupport/runtime_inspect.go](/D:/Programing/go/chainreactors/malice-network/server/testsupport/runtime_inspect.go) -- [server/real_implant_e2e_test.go](/D:/Programing/go/chainreactors/malice-network/server/real_implant_e2e_test.go) -- [client/command/real_implant_command_e2e_test.go](/D:/Programing/go/chainreactors/malice-network/client/command/real_implant_command_e2e_test.go) - -The existing mock state suites that the real tests were derived from: - -- [server/mock_implant_runtime_e2e_test.go](/D:/Programing/go/chainreactors/malice-network/server/mock_implant_runtime_e2e_test.go) - -## Prerequisites - -The real suite expects a local Rust implant workspace and debug binaries. By -default it uses: - -- workspace: `D:\Programing\rust\implant` -- template: `D:\Programing\rust\implant\target\debug\malefic.exe` -- mutant: `D:\Programing\rust\implant\target\debug\malefic-mutant.exe` - -The suite is guarded twice: - -- build tag: `realimplant` -- env gate: `MALICE_REAL_IMPLANT_RUN=1` - -If the env gate is not set, the tests skip cleanly. - -## Environment Variables - -Optional overrides: - -- `MALICE_REAL_IMPLANT_RUN=1` -- `MALICE_REAL_IMPLANT_WORKSPACE` -- `MALICE_REAL_IMPLANT_BIN` -- `MALICE_REAL_IMPLANT_MUTANT` - -Examples: - -```powershell -$env:MALICE_REAL_IMPLANT_RUN = "1" -$env:MALICE_REAL_IMPLANT_BIN = "D:\Programing\rust\implant\target\debug\malefic.exe" -$env:MALICE_REAL_IMPLANT_MUTANT = "D:\Programing\rust\implant\target\debug\malefic-mutant.exe" -``` - -## Running - +# Implant E2E Testing + +This document describes the current real-implant integration test path for the +Go teamserver repository. + +It replaces the earlier generic Rust-module guide. The current test target is +the Malice server/listener/session/task stack in this repository, not the +standalone Rust module workspace. + +## Goals + +The real-implant suite exists to validate the parts that mock-only coverage +cannot prove: + +- the teamserver can start a real implant-facing listener socket +- a patched `malefic.exe` can register against that listener +- task delivery reaches a real implant runtime +- real task callbacks drive the server-side task/session state machine +- dead-session and late-response recovery still work with an actual implant + +Mock implant tests remain the main regression suite for command parameter +parsing, request assembly, and broad RPC matrix coverage. Real implant tests are +the narrow but high-signal verification layer on top. + +The real suite reuses the same task/session assertions introduced by the +mock-based state suites. The difference is that the transport, registration, +checkin cadence, and late callback behavior now come from a real +`malefic.exe` process instead of an in-memory responder. + +## Current Coverage + +The `realimplant` suite currently covers these server-side behaviors through a +real `malefic.exe` process: + +- `sleep` +- `keepalive` +- `pwd` +- `ls` +- `sysinfo` +- `run` +- `exec` realtime streaming +- task progress transition `0/-1 -> 1/-1 -> 2/2` +- dead-session mark while a task is still pending +- late task response reborning a dead session +- database/runtime consistency for session alive state and task finish state + +It also now covers the client command closure for the same real transport path: + +- `implant --use --wait sleep 7 --jitter 0.15` +- `implant --use --wait keepalive enable|disable` +- `implant --use --wait sysinfo` +- `implant --use --wait pwd` +- `implant --use --wait ls ` +- `implant --use --wait run cmd.exe /c echo ...` +- `implant --use --wait mkdir/cd/pwd/touch/cp/cat/mv/ls/rm` +- `implant --use --wait upload|download` +- `implant --use --wait env`, `env set`, `env unset`, `whoami`, `ps`, `netstat`, `enum_drivers` +- `implant --use --wait kill`, `bypass` +- `implant --use --wait reg ...` on `HKCU` +- `implant --use --wait service list|query` +- `implant --use --wait taskschd list|query` +- `implant --use --wait wmi_query|wmi_execute` +- `implant --use --wait privs` +- `implant --use --wait runas` invalid-credential diagnostic path +- `implant --use --wait getsystem` diagnostic path + +This is intentionally smaller than the `mockimplant` matrix. + +The rule is: + +- mock tests cover breadth +- real implant tests cover transport reality and state-machine truth + +## Current Findings + +Keep confirmed implant-side defects in: + +- [implant-bugs.md](/D:/Programing/go/chainreactors/malice-network/docs/tests/implant-bugs.md) + +One separate server-side bootstrap issue is still visible in real runs: + +- the first listener-side `Checkin` can race ahead of `Register`, producing a + transient `record not found` warning during session bootstrap + +## Privilege Split + +Default real-implant regression should stay non-admin. The current non-admin +path includes: + +- basic control: `sleep`, `keepalive`, `sysinfo` +- filesystem: `mkdir`, `cd`, `pwd`, `touch`, `cp`, `cat`, `mv`, `ls`, `rm` +- system inventory: `env`, `setenv`, `unsetenv`, `whoami`, `ps`, `netstat`, `enum_drivers` +- Windows management without elevation: `reg` on `HKCU`, `service list|query`, `taskschd list|query`, `wmi_query`, `wmi_execute` + +Admin-required scenarios are tracked separately and should not block the +default non-admin pass: + +- `taskschd create|run|delete` +- future `service create|start|stop|delete` +- registry writes under privileged hives such as `HKLM` +- fully successful token/elevation flows that require real credentials or an + elevated implant + +## Real Runtime Differences From Mock + +The real implant exposed several behaviors that the mock harness did not model: + +- registration is not enough for the first task to be reliable; the suite waits + for the first post-register checkin before issuing the first RPC task +- realtime `exec` may emit multiple stdout chunks before completion +- the final realtime `exec` callback can be an empty terminal marker with + `end=true`, so the suite validates aggregate content with `GetAllTaskContent` + instead of assuming the final chunk contains the last visible output +- repeated real test runs can reuse the same raw/session identifier, so process + global transport and RPC stream state must be reset between harness instances + +These are real protocol/runtime facts, not test-only workarounds. + +## Architecture + +The real test path is: + +1. Start the in-process gRPC control plane with `ControlPlaneHarness`. +2. Start a real in-process listener via `server/listener.NewListener`. +3. Register and start a real TCP pipeline over admin RPC. +4. Generate an `implant.yaml` from the started pipeline. +5. Patch the local Rust `malefic.exe` template with `malefic-mutant tool patch-config`. +6. Spawn the patched implant process. +7. Wait for the real session to register. +8. Run the same style of task/session assertions used by the mock state tests. + +This matters because the old harness only seeded pipeline metadata in memory and +DB. It did not open a real implant-facing socket, so a real implant had nothing +to connect to. + +## Files + +Main implementation files: + +- [server/testsupport/real_implant.go](/D:/Programing/go/chainreactors/malice-network/server/testsupport/real_implant.go) +- [server/testsupport/runtime_inspect.go](/D:/Programing/go/chainreactors/malice-network/server/testsupport/runtime_inspect.go) +- [server/real_implant_e2e_test.go](/D:/Programing/go/chainreactors/malice-network/server/real_implant_e2e_test.go) +- [client/command/real_implant_command_e2e_test.go](/D:/Programing/go/chainreactors/malice-network/client/command/real_implant_command_e2e_test.go) + +The existing mock state suites that the real tests were derived from: + +- [server/mock_implant_runtime_e2e_test.go](/D:/Programing/go/chainreactors/malice-network/server/mock_implant_runtime_e2e_test.go) + +## Prerequisites + +The real suite expects a local Rust implant workspace and debug binaries. By +default it uses: + +- workspace: `D:\Programing\rust\implant` +- template: `D:\Programing\rust\implant\target\debug\malefic.exe` +- mutant: `D:\Programing\rust\implant\target\debug\malefic-mutant.exe` + +The suite is guarded twice: + +- build tag: `realimplant` +- env gate: `MALICE_REAL_IMPLANT_RUN=1` + +If the env gate is not set, the tests skip cleanly. + +## Environment Variables + +Optional overrides: + +- `MALICE_REAL_IMPLANT_RUN=1` +- `MALICE_REAL_IMPLANT_WORKSPACE` +- `MALICE_REAL_IMPLANT_BIN` +- `MALICE_REAL_IMPLANT_MUTANT` + +Examples: + +```powershell +$env:MALICE_REAL_IMPLANT_RUN = "1" +$env:MALICE_REAL_IMPLANT_BIN = "D:\Programing\rust\implant\target\debug\malefic.exe" +$env:MALICE_REAL_IMPLANT_MUTANT = "D:\Programing\rust\implant\target\debug\malefic-mutant.exe" +``` + +## Running + Run only the real implant suite: ```powershell $env:MALICE_REAL_IMPLANT_RUN = "1" -go test ./server -tags realimplant -run TestRealImplant -count=1 -timeout 300s +$packages = go run ./scripts/testmatrix -layer realimplant -format lines +go test -tags realimplant $packages -run TestRealImplant -count=1 -timeout 300s ``` - + Run the client command closure against the same real implant path: ```powershell $env:MALICE_REAL_IMPLANT_RUN = "1" go test ./client/command -tags realimplant -run TestRealImplantCommand -count=1 -timeout 300s ``` - -Run only the default non-admin command suites: - -```powershell -$env:MALICE_REAL_IMPLANT_RUN = "1" -go test ./client/command -tags realimplant -run "TestRealImplantCommand(BasicModulesE2E|FilesystemModulesE2E|SystemInventoryModulesE2E|WindowsManagementModulesE2E)$" -count=1 -timeout 300s -``` - -Run the privileged command suite explicitly: - -```powershell -$env:MALICE_REAL_IMPLANT_RUN = "1" -go test ./client/command -tags realimplant -run TestRealImplantCommandWindowsPrivilegedModulesE2E -count=1 -timeout 300s -``` - + +Run only the default non-admin command suites: + +```powershell +$env:MALICE_REAL_IMPLANT_RUN = "1" +go test ./client/command -tags realimplant -run "TestRealImplantCommand(BasicModulesE2E|FilesystemModulesE2E|SystemInventoryModulesE2E|WindowsManagementModulesE2E)$" -count=1 -timeout 300s +``` + +Run the privileged command suite explicitly: + +```powershell +$env:MALICE_REAL_IMPLANT_RUN = "1" +go test ./client/command -tags realimplant -run TestRealImplantCommandWindowsPrivilegedModulesE2E -count=1 -timeout 300s +``` + Run a single case: - -```powershell -$env:MALICE_REAL_IMPLANT_RUN = "1" + +```powershell +$env:MALICE_REAL_IMPLANT_RUN = "1" go test ./server -tags realimplant -run TestRealImplantDeadSweepKeepsPendingStreamingTaskAlive -count=1 -timeout 300s ``` -## Design Choices - -### Real listener instead of seeded pipeline - -The critical change is that the real suite starts an actual listener process in -the test runtime and then starts a real TCP pipeline through RPC. - -Without that, real implant tests are fake: the session/task logic may run, but -the implant transport layer is never exercised. - -### TCP + AES only - -The first real suite uses a plain TCP pipeline with AES payload encryption: - -- no TLS -- no secure mode -- no HTTP camouflage - -This is deliberate. The first goal is reliable task/session state validation. -TLS, HTTP, and secure-mode coverage can be added after the plain transport path -is stable. - -### Keepalive before edge-case lifecycle checks - -The dead-session streaming test enables `keepalive` before forcing the session -stale. That suppresses normal heartbeat timing enough to make the edge case -deterministic: - -- the session is marked dead -- the pending task keeps the runtime session resident -- the late task response revives the session - -If the test relied on the normal 1-second heartbeat loop, spontaneous checkins -could mask the bug. - -### Process-global isolation between real tests - -Running the real cases one by one was not enough. When the combined suite ran in -the same Go process, stale entries in the transport and RPC globals could route -the second test through the first test's stream state. - -The harness now resets these transient structures for every isolated real test -control plane: - -- `core.Connections` -- `core.Forwarders` -- `core.ListenerSessions` -- `rpc.pipelinesCh` -- `rpc.ptyStreamingSessions` - -Without this reset, the suite can pass individually and still fail when the two -real tests run back-to-back. - -## Pitfalls And Lessons - -The real suite exposed a set of recurring failure modes. These are the practical -rules that came out of that work. - -### Do not treat registration as task-ready - -A real session existing in runtime memory is not yet enough to issue the first -task safely. - -The stable sequence is: - -1. implant registers -2. server persists and exposes the session -3. implant performs the first normal checkin -4. only then issue the first RPC task - -If the test sends the first task immediately after `Register`, the first task -can race the real beacon loop and fail intermittently. - -### Realtime output and task completion are not the same thing - -Visible output is only part of the realtime `exec` contract. - -The real implant may: - -- emit multiple visible stdout callbacks -- end with a final empty callback -- mark only that final callback as `end=true` - -The test strategy that proved stable is: - -- use `WaitTaskContent` for intermediate progress checks -- use `GetAllTaskContent` to validate that expected visible output appeared -- use `WaitTaskFinish` to validate the terminal marker and finished task state - -Do not assume the last visible output chunk is also the finishing callback. - -### Listener teardown order matters - -Real implant teardown was initially flaky because stopping the pipeline alone -did not fully release listener-side gRPC state. - -The cleanup that proved reliable is: - -1. stop the implant process -2. stop the pipeline through RPC -3. close the in-process listener explicitly -4. stop the control-plane gRPC server - -Without the explicit listener close, `GracefulStop` could remain blocked on open -streams. - -### Always run the combined suite, not only single tests - -The real suite initially passed case-by-case and still failed as a group. - -That failure turned out to be test pollution from process-global state, not a -task-state bug in the individual case itself. For real implant coverage, a -single passing test is necessary but not sufficient. - -The minimum validation loop is: - -- run the single case while developing it -- run the full `TestRealImplant` suite before considering the test stable - -### Keep edge cases deterministic by suppressing normal heartbeats - -For dead-session and late-response scenarios, normal checkins are noise. - -The most reliable pattern was: - -- enable `keepalive` -- force the session stale -- sweep inactive sessions -- wait for the pending task callback to revive the session - -This removes dependence on the normal heartbeat cadence and makes the -dead/reborn transition reproducible. - -### Keep the first real transport simple - -TCP + AES only was the correct first step. - -Trying to validate: - -- real implant process -- real listener socket -- TLS setup -- secure mode -- HTTP camouflage - -all at once would have hidden the actual failure source. The useful order is to -prove plain transport and state-machine behavior first, then layer additional -transport features later. - -### Prefer absolute paths and existing fixture files in filesystem E2E - -The first filesystem command suite used `shell` redirection to create a source -file inside the implant. That added avoidable `cmd.exe` quoting noise and -produced a false negative before `cp` ever ran. - -The stable pattern is: - -- use absolute paths -- create empty files with `touch` -- copy an existing real text file such as the generated implant YAML - -This keeps the failure signal on the filesystem module under test instead of on -shell escaping. - -### Preserve implant stdout and stderr in failures - -When a real implant exits early, the binary's own output is often the only fast -way to distinguish: - -- config schema mismatch -- binary/module mismatch -- local security interference -- transport startup failure - -Every real test harness should keep process stdout/stderr attached to the -failure path. - -## Authoring Checklist - -When adding a new real implant case, keep this checklist: - -- start a real listener and a real started pipeline, not only seeded metadata -- wait for register and then for the first post-register checkin -- prefer harmless read-only modules first -- for streaming tasks, validate progress and terminal marker separately -- force deterministic timing for lifecycle edge cases instead of relying on - ambient heartbeats -- close implant, pipeline, and listener explicitly during cleanup -- run the single test and then the combined `TestRealImplant` suite - -## What Real Tests Should Cover - -Use real implant tests for: - -- session registration truth -- listener/pipeline transport truth -- Cobra command -> RPC -> implant closure -- task callback timing -- wait/task completion behavior -- dead/reborn lifecycle transitions -- runtime vs DB state consistency - -Do not use real implant tests as the main place for: - -- full RPC breadth -- exhaustive parameter assembly -- rare error permutations -- command parser corner cases - -Those stay in `mockimplant` because they are faster, broader, and easier to -debug. - -## Extending Coverage - -Recommended next additions, in order: - -1. `info` -2. `ls` -3. `ping` -4. HTTP pipeline variant -5. TLS TCP pipeline variant -6. idle-dead-session removal and later heartbeat reborn - -Additions should stay conservative: - -- use harmless commands -- prefer read-only modules first -- only add mutation RPC coverage when the expected host-side effect is stable on - the CI/local environment - -## Troubleshooting - -### Test skipped - -Most common cause: - -```text -set MALICE_REAL_IMPLANT_RUN=1 to enable real implant integration tests -``` - -Set the env var and rerun. - -### Patch step failed - -Check: - -- `malefic-mutant.exe` exists -- `malefic.exe` exists -- the generated `implant.yaml` is valid for the current Rust implant version - -The suite shells out to: - -```powershell -malefic-mutant.exe tool patch-config -f malefic.exe --from-implant -o -``` - -### Implant exits before registering - -The test fixture includes captured stdout/stderr from the implant process in the -failure message. - -Typical causes: - -- template binary and runtime config schema are from mismatched Rust revisions -- selected template was built without the required modules -- pipeline port was unavailable -- local security software killed the implant process immediately - -### Session revives too early in lifecycle tests - -That usually means the test path still allowed normal heartbeats to race with -the forced dead sweep. The current suite handles this by enabling `keepalive` -before the delayed `exec` task. - -### Combined suite fails while single tests pass - -That points to leaked in-process runtime state, not necessarily a protocol bug. - -Check that the harness is resetting the transient transport/RPC maps listed -above. The failure mode is usually that the second test's task traffic is still -associated with the first test's pipeline stream. - -## Relationship To Mock Tests - -The mock suite is still the authoritative coverage for command breadth: - -- command parameter parsing -- request body assembly -- mock scenario state mutation -- large RPC matrix - -The real suite is intentionally smaller and should remain so. Its value is not -volume. Its value is that when it fails, the transport or lifecycle behavior is -actually broken. +Run the same suite in GitHub Actions through the manual workflow: + +- workflow: `.github/workflows/realimplant.yaml` +- runner labels: `self-hosted`, `windows` +- required repository variables: `MALICE_REAL_IMPLANT_BIN`, `MALICE_REAL_IMPLANT_MUTANT` +- optional repository variable: `MALICE_REAL_IMPLANT_WORKSPACE` + +## Design Choices + +### Real listener instead of seeded pipeline + +The critical change is that the real suite starts an actual listener process in +the test runtime and then starts a real TCP pipeline through RPC. + +Without that, real implant tests are fake: the session/task logic may run, but +the implant transport layer is never exercised. + +### TCP + AES only + +The first real suite uses a plain TCP pipeline with AES payload encryption: + +- no TLS +- no secure mode +- no HTTP camouflage + +This is deliberate. The first goal is reliable task/session state validation. +TLS, HTTP, and secure-mode coverage can be added after the plain transport path +is stable. + +### Keepalive before edge-case lifecycle checks + +The dead-session streaming test enables `keepalive` before forcing the session +stale. That suppresses normal heartbeat timing enough to make the edge case +deterministic: + +- the session is marked dead +- the pending task keeps the runtime session resident +- the late task response revives the session + +If the test relied on the normal 1-second heartbeat loop, spontaneous checkins +could mask the bug. + +### Process-global isolation between real tests + +Running the real cases one by one was not enough. When the combined suite ran in +the same Go process, stale entries in the transport and RPC globals could route +the second test through the first test's stream state. + +The harness now resets these transient structures for every isolated real test +control plane: + +- `core.Connections` +- `core.Forwarders` +- `core.ListenerSessions` +- `rpc.pipelinesCh` +- `rpc.ptyStreamingSessions` + +Without this reset, the suite can pass individually and still fail when the two +real tests run back-to-back. + +## Pitfalls And Lessons + +The real suite exposed a set of recurring failure modes. These are the practical +rules that came out of that work. + +### Do not treat registration as task-ready + +A real session existing in runtime memory is not yet enough to issue the first +task safely. + +The stable sequence is: + +1. implant registers +2. server persists and exposes the session +3. implant performs the first normal checkin +4. only then issue the first RPC task + +If the test sends the first task immediately after `Register`, the first task +can race the real beacon loop and fail intermittently. + +### Realtime output and task completion are not the same thing + +Visible output is only part of the realtime `exec` contract. + +The real implant may: + +- emit multiple visible stdout callbacks +- end with a final empty callback +- mark only that final callback as `end=true` + +The test strategy that proved stable is: + +- use `WaitTaskContent` for intermediate progress checks +- use `GetAllTaskContent` to validate that expected visible output appeared +- use `WaitTaskFinish` to validate the terminal marker and finished task state + +Do not assume the last visible output chunk is also the finishing callback. + +### Listener teardown order matters + +Real implant teardown was initially flaky because stopping the pipeline alone +did not fully release listener-side gRPC state. + +The cleanup that proved reliable is: + +1. stop the implant process +2. stop the pipeline through RPC +3. close the in-process listener explicitly +4. stop the control-plane gRPC server + +Without the explicit listener close, `GracefulStop` could remain blocked on open +streams. + +### Always run the combined suite, not only single tests + +The real suite initially passed case-by-case and still failed as a group. + +That failure turned out to be test pollution from process-global state, not a +task-state bug in the individual case itself. For real implant coverage, a +single passing test is necessary but not sufficient. + +The minimum validation loop is: + +- run the single case while developing it +- run the full `TestRealImplant` suite before considering the test stable + +### Keep edge cases deterministic by suppressing normal heartbeats + +For dead-session and late-response scenarios, normal checkins are noise. + +The most reliable pattern was: + +- enable `keepalive` +- force the session stale +- sweep inactive sessions +- wait for the pending task callback to revive the session + +This removes dependence on the normal heartbeat cadence and makes the +dead/reborn transition reproducible. + +### Keep the first real transport simple + +TCP + AES only was the correct first step. + +Trying to validate: + +- real implant process +- real listener socket +- TLS setup +- secure mode +- HTTP camouflage + +all at once would have hidden the actual failure source. The useful order is to +prove plain transport and state-machine behavior first, then layer additional +transport features later. + +### Prefer absolute paths and existing fixture files in filesystem E2E + +The first filesystem command suite used `shell` redirection to create a source +file inside the implant. That added avoidable `cmd.exe` quoting noise and +produced a false negative before `cp` ever ran. + +The stable pattern is: + +- use absolute paths +- create empty files with `touch` +- copy an existing real text file such as the generated implant YAML + +This keeps the failure signal on the filesystem module under test instead of on +shell escaping. + +### Preserve implant stdout and stderr in failures + +When a real implant exits early, the binary's own output is often the only fast +way to distinguish: + +- config schema mismatch +- binary/module mismatch +- local security interference +- transport startup failure + +Every real test harness should keep process stdout/stderr attached to the +failure path. + +## Authoring Checklist + +When adding a new real implant case, keep this checklist: + +- start a real listener and a real started pipeline, not only seeded metadata +- wait for register and then for the first post-register checkin +- prefer harmless read-only modules first +- for streaming tasks, validate progress and terminal marker separately +- force deterministic timing for lifecycle edge cases instead of relying on + ambient heartbeats +- close implant, pipeline, and listener explicitly during cleanup +- run the single test and then the combined `TestRealImplant` suite + +## What Real Tests Should Cover + +Use real implant tests for: + +- session registration truth +- listener/pipeline transport truth +- Cobra command -> RPC -> implant closure +- task callback timing +- wait/task completion behavior +- dead/reborn lifecycle transitions +- runtime vs DB state consistency + +Do not use real implant tests as the main place for: + +- full RPC breadth +- exhaustive parameter assembly +- rare error permutations +- command parser corner cases + +Those stay in `mockimplant` because they are faster, broader, and easier to +debug. + +## Extending Coverage + +Recommended next additions, in order: + +1. `info` +2. `ls` +3. `ping` +4. HTTP pipeline variant +5. TLS TCP pipeline variant +6. idle-dead-session removal and later heartbeat reborn + +Additions should stay conservative: + +- use harmless commands +- prefer read-only modules first +- only add mutation RPC coverage when the expected host-side effect is stable on + the CI/local environment + +## Troubleshooting + +### Test skipped + +Most common cause: + +```text +set MALICE_REAL_IMPLANT_RUN=1 to enable real implant integration tests +``` + +Set the env var and rerun. + +### Patch step failed + +Check: + +- `malefic-mutant.exe` exists +- `malefic.exe` exists +- the generated `implant.yaml` is valid for the current Rust implant version + +The suite shells out to: + +```powershell +malefic-mutant.exe tool patch-config -f malefic.exe --from-implant -o +``` + +### Implant exits before registering + +The test fixture includes captured stdout/stderr from the implant process in the +failure message. + +Typical causes: + +- template binary and runtime config schema are from mismatched Rust revisions +- selected template was built without the required modules +- pipeline port was unavailable +- local security software killed the implant process immediately + +### Session revives too early in lifecycle tests + +That usually means the test path still allowed normal heartbeats to race with +the forced dead sweep. The current suite handles this by enabling `keepalive` +before the delayed `exec` task. + +### Combined suite fails while single tests pass + +That points to leaked in-process runtime state, not necessarily a protocol bug. + +Check that the harness is resetting the transient transport/RPC maps listed +above. The failure mode is usually that the second test's task traffic is still +associated with the first test's pipeline stream. + +## Relationship To Mock Tests + +The mock suite is still the authoritative coverage for command breadth: + +- command parameter parsing +- request body assembly +- mock scenario state mutation +- large RPC matrix + +The real suite is intentionally smaller and should remain so. Its value is not +volume. Its value is that when it fails, the transport or lifecycle behavior is +actually broken. diff --git a/scripts/testmatrix/discover.go b/scripts/testmatrix/discover.go new file mode 100644 index 000000000..c0205ecf5 --- /dev/null +++ b/scripts/testmatrix/discover.go @@ -0,0 +1,120 @@ +package main + +import ( + "fmt" + "os" + "path/filepath" + "sort" + "strings" +) + +var skipDirs = map[string]struct{}{ + ".claude": {}, + ".git": {}, + ".idea": {}, + ".malice": {}, + "bin": {}, + "dist": {}, + "external": {}, +} + +func discoverTaggedPackages(root, layer string) ([]string, error) { + packages := make(map[string]struct{}) + + err := filepath.WalkDir(root, func(path string, d os.DirEntry, err error) error { + if err != nil { + return err + } + + if d.IsDir() { + if _, skip := skipDirs[d.Name()]; skip && sameDir(path, root) == false { + return filepath.SkipDir + } + return nil + } + + if !strings.HasSuffix(path, "_test.go") { + return nil + } + + expr, err := readBuildExpr(path) + if err != nil { + return err + } + if !buildExprContains(expr, layer) { + return nil + } + + relDir, err := filepath.Rel(root, filepath.Dir(path)) + if err != nil { + return fmt.Errorf("resolve relative package path for %s: %w", path, err) + } + packages[toPackagePattern(relDir)] = struct{}{} + return nil + }) + if err != nil { + return nil, err + } + + results := make([]string, 0, len(packages)) + for pkg := range packages { + results = append(results, pkg) + } + sort.Strings(results) + return results, nil +} + +func readBuildExpr(path string) (string, error) { + data, err := os.ReadFile(path) + if err != nil { + return "", fmt.Errorf("read build tags from %s: %w", path, err) + } + + for _, line := range strings.Split(string(data), "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "//go:build ") { + return strings.TrimSpace(strings.TrimPrefix(line, "//go:build ")), nil + } + if line == "" || strings.HasPrefix(line, "//") { + continue + } + break + } + + return "", nil +} + +func buildExprContains(expr, tag string) bool { + if expr == "" { + return false + } + + separators := func(r rune) bool { + switch r { + case ' ', '\t', '\r', '\n', '&', '|', '!', '(', ')': + return true + default: + return false + } + } + + for _, token := range strings.FieldsFunc(expr, separators) { + if token == tag { + return true + } + } + return false +} + +func toPackagePattern(relDir string) string { + if relDir == "." { + return "." + } + return "./" + filepath.ToSlash(relDir) +} + +func sameDir(left, right string) bool { + leftClean := filepath.Clean(left) + rightClean := filepath.Clean(right) + return strings.EqualFold(leftClean, rightClean) +} diff --git a/scripts/testmatrix/discover_test.go b/scripts/testmatrix/discover_test.go new file mode 100644 index 000000000..e263048ea --- /dev/null +++ b/scripts/testmatrix/discover_test.go @@ -0,0 +1,68 @@ +package main + +import ( + "os" + "path/filepath" + "reflect" + "testing" +) + +func TestDiscoverTaggedPackages(t *testing.T) { + root := t.TempDir() + + writeTestFile(t, root, "server/client_server_integration_test.go", "//go:build integration\n\npackage server\n") + writeTestFile(t, root, "client/command/listener/pipeline_integration_test.go", "//go:build integration\n\npackage listener\n") + writeTestFile(t, root, "server/mock_implant_runtime_e2e_test.go", "//go:build mockimplant\n\npackage server\n") + writeTestFile(t, root, "external/rem/runner/e2e_tunnel_dns_test.go", "//go:build dns\n\npackage runner\n") + writeTestFile(t, root, "helper/cryptography/age_e2e_test.go", "package cryptography\n") + + got, err := discoverTaggedPackages(root, "integration") + if err != nil { + t.Fatalf("discoverTaggedPackages returned error: %v", err) + } + + want := []string{ + "./client/command/listener", + "./server", + } + if !reflect.DeepEqual(got, want) { + t.Fatalf("discoverTaggedPackages() = %#v, want %#v", got, want) + } +} + +func TestDiscoverTaggedPackagesReturnsRootPackage(t *testing.T) { + root := t.TempDir() + + writeTestFile(t, root, "root_integration_test.go", "//go:build integration\n\npackage main\n") + + got, err := discoverTaggedPackages(root, "integration") + if err != nil { + t.Fatalf("discoverTaggedPackages returned error: %v", err) + } + + want := []string{"."} + if !reflect.DeepEqual(got, want) { + t.Fatalf("discoverTaggedPackages() = %#v, want %#v", got, want) + } +} + +func TestBuildExprContains(t *testing.T) { + if !buildExprContains("integration && linux", "integration") { + t.Fatal("expected integration token match") + } + if buildExprContains("realintegration", "integration") { + t.Fatal("did not expect partial token match") + } +} + +func writeTestFile(t *testing.T, root, relPath, content string) { + t.Helper() + + path := filepath.Join(root, filepath.FromSlash(relPath)) + if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil { + t.Fatalf("MkdirAll(%q): %v", path, err) + } + if err := os.WriteFile(path, []byte(content), 0o644); err != nil { + t.Fatalf("WriteFile(%q): %v", path, err) + } +} diff --git a/scripts/testmatrix/main.go b/scripts/testmatrix/main.go new file mode 100644 index 000000000..083198563 --- /dev/null +++ b/scripts/testmatrix/main.go @@ -0,0 +1,50 @@ +package main + +import ( + "errors" + "flag" + "fmt" + "os" + "strings" +) + +func main() { + var layer string + var format string + var root string + var failEmpty bool + + flag.StringVar(&layer, "layer", "", "build tag or test layer to discover") + flag.StringVar(&format, "format", "shell", "output format: shell or lines") + flag.StringVar(&root, "root", ".", "repository root to scan") + flag.BoolVar(&failEmpty, "fail-empty", true, "exit with an error when no tagged packages are found") + flag.Parse() + + if strings.TrimSpace(layer) == "" { + exitErr(errors.New("layer is required")) + } + + packages, err := discoverTaggedPackages(root, layer) + if err != nil { + exitErr(err) + } + if len(packages) == 0 && failEmpty { + exitErr(fmt.Errorf("no packages found for layer %q", layer)) + } + + switch format { + case "shell": + fmt.Println(strings.Join(packages, " ")) + case "lines": + for _, pkg := range packages { + fmt.Println(pkg) + } + default: + exitErr(fmt.Errorf("unsupported format %q", format)) + } +} + +func exitErr(err error) { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) +}