From d7020ab91d4af6f0d4d97e6de643ece29fb94c93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Otto=20Boly=C3=B3s?= Date: Wed, 3 Jun 2026 06:58:56 +0200 Subject: [PATCH 1/7] test(docs-tests): pin route-shard distribution contract Add ShardRoutes contract tests pinning round-robin distribution across N shards, the union+disjointness partition invariants, the within-shard order preservation, and the argument-validation failure modes (null routes, non-positive total, out-of-range index). The implementation arrives in the follow-up commit; this commit is the RED step and intentionally does not compile. --- .../RouteCheckHelpersTests.cs | 232 ++++++++++++++++++ 1 file changed, 232 insertions(+) diff --git a/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs b/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs index d4ac4d646..8eb235240 100644 --- a/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs +++ b/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs @@ -552,6 +552,238 @@ public void RepoRootMaxAncestorDepth_IsAt32() Assert.That(RouteCheckHelpers.RepoRootMaxAncestorDepth, Is.EqualTo(32)); } + // ─── ShardRoutes ───────────────────────────────────────────────────────── + + /// + /// Sample route set used by the sharding tests below. Twenty routes + /// in lexical order is large enough to make per-shard counts and + /// per-shard membership observable, small enough to enumerate by + /// hand in the assertions. + /// + private static readonly IReadOnlyList SampleRoutes = new[] + { + "/a", "/b", "/c", "/d", "/e", + "/f", "/g", "/h", "/i", "/j", + "/k", "/l", "/m", "/n", "/o", + "/p", "/q", "/r", "/s", "/t", + }; + + /// + /// A 20-route list partitioned across four shards yields exactly + /// five routes per shard (round-robin distribution). Pins the + /// even-division contract — a regression that switched to a + /// contiguous-chunk policy would surface here as 5/5/5/5 vs + /// chunked 5/5/5/5 (same count) but a different membership; the + /// membership invariants below cover the distinction. + /// + [TestCase(1, 4, ExpectedResult = 5)] + [TestCase(2, 4, ExpectedResult = 5)] + [TestCase(3, 4, ExpectedResult = 5)] + [TestCase(4, 4, ExpectedResult = 5)] + public int ShardRoutes_DistributesEvenly_AcrossFourShards(int index, int total) + { + return RouteCheckHelpers.ShardRoutes(SampleRoutes, index, total).Count; + } + + /// + /// An 11-route list partitioned across four shards yields 3, 3, 3, 2 + /// — the round-robin remainder lands on the earliest shards. Pins + /// the remainder-handling contract so a future refactor that + /// switched to ceil-division would surface here. + /// + [TestCase(1, 4, ExpectedResult = 3)] + [TestCase(2, 4, ExpectedResult = 3)] + [TestCase(3, 4, ExpectedResult = 3)] + [TestCase(4, 4, ExpectedResult = 2)] + public int ShardRoutes_DistributesRemainder_AcrossFourShards(int index, int total) + { + var routes = SampleRoutes.Take(11).ToList(); + return RouteCheckHelpers.ShardRoutes(routes, index, total).Count; + } + + /// + /// Shard 1 of 1 returns the full input — the no-sharding identity + /// path. Local invocations without + /// ROUTE_SHARD_INDEX / ROUTE_SHARD_TOTAL default to + /// this case so a developer running dotnet test still + /// exercises every route. + /// + [Test] + public void ShardRoutes_Shard1Of1_ReturnsAllRoutes() + { + var shard = RouteCheckHelpers.ShardRoutes(SampleRoutes, 1, 1); + Assert.That(shard, Is.EqualTo(SampleRoutes)); + } + + /// + /// The concatenation of every shard (in index order) covers every + /// input route exactly once. Pins the partition invariant: no + /// route is dropped, no route is double-walked. + /// + [Test] + public void ShardRoutes_UnionAcrossShards_CoversEveryRoute() + { + const int total = 4; + var union = new List(); + for (var i = 1; i <= total; i++) + { + union.AddRange(RouteCheckHelpers.ShardRoutes(SampleRoutes, i, total)); + } + + Assert.That(union, Is.EquivalentTo(SampleRoutes), + "the union of all shards must equal the input route set"); + Assert.That(union, Has.Count.EqualTo(SampleRoutes.Count), + "no route may appear in more than one shard"); + } + + /// + /// Any pair of shards is disjoint. Pins the partition invariant + /// from the membership side — a regression that produced + /// overlapping shards (e.g. an off-by-one on the modulus) would + /// double-walk routes and surface here even if the union check + /// still passed by coincidence. + /// + [Test] + public void ShardRoutes_DifferentShards_AreDisjoint() + { + const int total = 4; + for (var a = 1; a <= total; a++) + { + for (var b = a + 1; b <= total; b++) + { + var shardA = RouteCheckHelpers.ShardRoutes(SampleRoutes, a, total); + var shardB = RouteCheckHelpers.ShardRoutes(SampleRoutes, b, total); + Assert.That(shardA.Intersect(shardB), Is.Empty, + $"shards {a} and {b} of {total} share at least one route"); + } + } + } + + /// + /// Within a single shard, the routes appear in the same relative + /// order as in the input. Round-robin distribution preserves the + /// input's monotone order — the assertion is what lets the CI + /// matrix legs produce diffable failure summaries that line up + /// with the source markdown's ordering. + /// + [Test] + public void ShardRoutes_WithinAShard_PreservesInputOrder() + { + const int total = 4; + for (var i = 1; i <= total; i++) + { + var shard = RouteCheckHelpers.ShardRoutes(SampleRoutes, i, total); + var indices = shard.Select(r => SampleRoutes.IndexOf(r)).ToList(); + Assert.That(indices, Is.Ordered.Ascending, + $"shard {i}/{total} routes are out of order relative to the input"); + } + } + + /// + /// Routes assigned to shard i of total are exactly + /// the routes whose zero-based index in the input satisfies + /// index % total == i - 1. Pins the round-robin contract + /// explicitly so a refactor that swapped the modulus formula + /// (e.g. chunked partitioning) would surface as a membership + /// failure rather than a silent re-ordering. + /// + [Test] + public void ShardRoutes_AssignmentFollowsRoundRobinModulus() + { + const int total = 4; + for (var i = 1; i <= total; i++) + { + var expected = SampleRoutes + .Select((r, idx) => (r, idx)) + .Where(t => t.idx % total == i - 1) + .Select(t => t.r) + .ToList(); + var actual = RouteCheckHelpers.ShardRoutes(SampleRoutes, i, total); + Assert.That(actual, Is.EqualTo(expected), + $"shard {i}/{total} membership does not match the round-robin contract"); + } + } + + /// + /// A null routes list throws + /// with the parameter name pinned per §10a — a future caller that + /// passes null by accident gets a typed failure naming the + /// argument rather than an opaque NRE. + /// + [Test] + public void ShardRoutes_NullRoutes_Throws() + { + var ex = Assert.Throws( + () => RouteCheckHelpers.ShardRoutes(null!, 1, 1)); + Assert.That(ex!.ParamName, Is.EqualTo("routes")); + } + + /// + /// A non-positive total throws + /// with the parameter name pinned. Zero or negative totals are + /// nonsense and must not silently produce an empty shard. + /// + [TestCase(0)] + [TestCase(-1)] + public void ShardRoutes_NonPositiveTotal_Throws(int total) + { + var ex = Assert.Throws( + () => RouteCheckHelpers.ShardRoutes(SampleRoutes, 1, total)); + Assert.That(ex!.ParamName, Is.EqualTo("total")); + } + + /// + /// An out-of-range index throws + /// with the parameter name pinned. The contract is 1-based: + /// 1 <= index <= total. Index 0 (zero-based off-by-one) + /// and index total + 1 (one-past-the-end) both fall outside. + /// + [TestCase(0, 4)] + [TestCase(5, 4)] + [TestCase(-1, 4)] + public void ShardRoutes_IndexOutOfRange_Throws(int index, int total) + { + var ex = Assert.Throws( + () => RouteCheckHelpers.ShardRoutes(SampleRoutes, index, total)); + Assert.That(ex!.ParamName, Is.EqualTo("index")); + } + + /// + /// An empty input yields an empty shard for every (index, total) + /// pair — no exception, no phantom entries. Local invocations on + /// a fresh checkout where the docs tree is empty (e.g. a worktree + /// added before the markdown source landed) still produce a + /// deterministic result. + /// + [Test] + public void ShardRoutes_EmptyInput_ReturnsEmpty() + { + var empty = Array.Empty(); + Assert.That(RouteCheckHelpers.ShardRoutes(empty, 1, 4), Is.Empty); + Assert.That(RouteCheckHelpers.ShardRoutes(empty, 4, 4), Is.Empty); + } + + /// + /// When the input has fewer routes than shards, the surplus shards + /// are empty rather than throwing. CI keeps a 4-shard matrix even + /// for a hypothetically tiny docs tree — surplus shards must + /// no-op rather than fail. + /// + [Test] + public void ShardRoutes_FewerRoutesThanShards_SurplusShardsAreEmpty() + { + var two = new[] { "/x", "/y" }; + var shard1 = RouteCheckHelpers.ShardRoutes(two, 1, 4); + var shard2 = RouteCheckHelpers.ShardRoutes(two, 2, 4); + var shard3 = RouteCheckHelpers.ShardRoutes(two, 3, 4); + var shard4 = RouteCheckHelpers.ShardRoutes(two, 4, 4); + + Assert.That(shard1, Is.EqualTo(new[] { "/x" })); + Assert.That(shard2, Is.EqualTo(new[] { "/y" })); + Assert.That(shard3, Is.Empty); + Assert.That(shard4, Is.Empty); + } + // ─── Helper: scoped temp directory ─────────────────────────────────────── /// From 46a78b305b85fbb5606103dbb9e98a0c5e6a8eec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Otto=20Boly=C3=B3s?= Date: Wed, 3 Jun 2026 07:04:29 +0200 Subject: [PATCH 2/7] perf(docs-tests): shard route walk across the CI matrix Drop the build-test-coverage Ubuntu wall-clock by partitioning the RouteCheckTests Playwright route walk across four parallel matrix shards. The longest single shard becomes the critical-path floor rather than the full sequential walk of every markdown-backed route. - Add ShardRoutes(routes, index, total) to RouteCheckHelpers, implementing round-robin distribution (route at position i lands in shard (i % total) + 1) so per-shard wall-clock stays balanced even when expensive routes cluster in the source tree. - Add ReadShardEnv() reading ROUTE_SHARD_INDEX and ROUTE_SHARD_TOTAL from the process environment, defaulting to (1, 1) on unset or unparseable values so local dotnet test invocations walk every route by default. Clamps a half-set matrix where ROUTE_SHARD_INDEX exceeds ROUTE_SHARD_TOTAL to the largest valid shard so the caller never receives an out-of-range index. - Wire Every_Markdown_Backed_Route_Resolves_Without_A_404 to slice the collected route list through ShardRoutes before walking it. A surplus shard (more shards than routes) legitimately walks zero routes and short-circuits to success. - Add shard / shardTotal matrix dimensions to the build-test-coverage workflow. shard is [1, 2, 3, 4] on ubuntu-latest; windows-latest is capped at shard 1 of 4 via exclude rules because the Windows leg filters out Category=E2E and has no shardable work. Each leg env-injects ROUTE_SHARD_INDEX and ROUTE_SHARD_TOTAL into the test runner, names its TRX with the shard suffix to avoid collisions, and uploads its artifacts under a shard-qualified name. --- .github/workflows/dotnet.yml | 60 ++++++++-- .../RouteCheckHelpers.cs | 88 ++++++++++++++ .../RouteCheckHelpersTests.cs | 111 ++++++++++++++++++ .../RouteCheckTests.cs | 30 ++++- 4 files changed, 278 insertions(+), 11 deletions(-) diff --git a/.github/workflows/dotnet.yml b/.github/workflows/dotnet.yml index c8d7e41de..49b33b0f6 100644 --- a/.github/workflows/dotnet.yml +++ b/.github/workflows/dotnet.yml @@ -25,7 +25,7 @@ on: jobs: build-and-test: - name: build-and-test-${{ matrix.os }} + name: build-and-test-${{ matrix.os }}-shard${{ matrix.shard }}of${{ matrix.shardTotal }} # Skip drafts: run only on push-to-master + ready (non-draft) PRs. # The pull_request `types` list above includes `ready_for_review` so # CI fires the moment a draft is flipped to ready. @@ -34,7 +34,20 @@ jobs: strategy: fail-fast: false matrix: + # The ubuntu-latest leg runs Category=E2E, dominated wall-clock + # by the RouteCheckTests Playwright route walk. Sharding the + # walk across N parallel matrix entries on ubuntu drops the + # critical-path wall-clock from N*single-shard down to roughly + # single-shard; each shard env-injects ROUTE_SHARD_INDEX / + # ROUTE_SHARD_TOTAL into the test runner, and RouteCheckTests + # honours those via RouteCheckHelpers.ReadShardEnv + + # RouteCheckHelpers.ShardRoutes. windows-latest excludes + # Category=E2E altogether (no Docker for Linux images) so it + # has no shardable work; the exclude block below caps it at + # shard 1 of 4. os: [ubuntu-latest, windows-latest] + shard: [1, 2, 3, 4] + shardTotal: [4] include: # ubuntu-latest runs the full sweep: Docker is pre-installed # on GitHub-hosted Linux runners and Testcontainers can spawn @@ -53,6 +66,18 @@ jobs: # platform dependency and still runs. - os: windows-latest testFilter: "Category!=RequiresDocker&Category!=E2E" + exclude: + # windows-latest never runs Category=E2E (testFilter excludes + # it). Running every windows shard would therefore re-run the + # same unit + integration suite four times for no coverage + # gain. Cap windows at shard 1 of 4 — the non-E2E suite still + # runs once; the E2E sharding is ubuntu-only. + - os: windows-latest + shard: 2 + - os: windows-latest + shard: 3 + - os: windows-latest + shard: 4 steps: - name: Checkout @@ -150,6 +175,16 @@ jobs: - name: Run unit tests with coverage env: TEST_FILTER: ${{ matrix.testFilter }} + # ROUTE_SHARD_INDEX / ROUTE_SHARD_TOTAL drive RouteCheckTests + # via RouteCheckHelpers.ReadShardEnv -> ShardRoutes, splitting + # the ~1,880-route Playwright walk across N parallel matrix + # entries. Unset / total=1 collapses to the no-sharding path, + # which is the local-invocation default. Integers come from + # the static strategy.matrix definition above, never from + # github.event.* untrusted input. + ROUTE_SHARD_INDEX: ${{ matrix.shard }} + ROUTE_SHARD_TOTAL: ${{ matrix.shardTotal }} + TRX_NAME: test-results-${{ matrix.os }}-shard${{ matrix.shard }}of${{ matrix.shardTotal }}.trx run: | if [ -n "$TEST_FILTER" ]; then dotnet test MTConnect.NET.sln \ @@ -157,7 +192,7 @@ jobs: --no-build \ --settings tests/coverlet.runsettings \ --results-directory TestResults \ - --logger "trx;LogFileName=test-results-${{ matrix.os }}.trx" \ + --logger "trx;LogFileName=${TRX_NAME}" \ --filter "$TEST_FILTER" else dotnet test MTConnect.NET.sln \ @@ -165,7 +200,7 @@ jobs: --no-build \ --settings tests/coverlet.runsettings \ --results-directory TestResults \ - --logger "trx;LogFileName=test-results-${{ matrix.os }}.trx" + --logger "trx;LogFileName=${TRX_NAME}" fi shell: bash @@ -174,9 +209,11 @@ jobs: # eager filter that discovers nothing must hard-fail rather than # exit 0. - name: Guard — unit step executed > 0 tests + env: + TRX_NAME: test-results-${{ matrix.os }}-shard${{ matrix.shard }}of${{ matrix.shardTotal }}.trx run: | set -euo pipefail - trx="TestResults/test-results-${{ matrix.os }}.trx" + trx="TestResults/${TRX_NAME}" if [ ! -f "$trx" ]; then echo "::error::Unit TRX not found at $trx — the test step did not produce results." exit 1 @@ -244,6 +281,7 @@ jobs: - name: Run integration tests with coverage env: TEST_FILTER: ${{ matrix.testFilter }} + TRX_NAME: test-results-integration-${{ matrix.os }}-shard${{ matrix.shard }}of${{ matrix.shardTotal }}.trx run: | if [ -n "$TEST_FILTER" ]; then dotnet test tests/MTConnect.NET-Integration-Tests/MTConnect.NET-Integration-Tests.csproj \ @@ -252,7 +290,7 @@ jobs: -p:IntegrationCoverage=true \ --settings tests/coverlet.integration.runsettings \ --results-directory TestResults \ - --logger "trx;LogFileName=test-results-integration-${{ matrix.os }}.trx" \ + --logger "trx;LogFileName=${TRX_NAME}" \ --filter "$TEST_FILTER" else dotnet test tests/MTConnect.NET-Integration-Tests/MTConnect.NET-Integration-Tests.csproj \ @@ -261,7 +299,7 @@ jobs: -p:IntegrationCoverage=true \ --settings tests/coverlet.integration.runsettings \ --results-directory TestResults \ - --logger "trx;LogFileName=test-results-integration-${{ matrix.os }}.trx" + --logger "trx;LogFileName=${TRX_NAME}" fi shell: bash @@ -272,9 +310,11 @@ jobs: # the TRX total and fail when it is 0 or the # element is missing. - name: Guard — integration step executed > 0 tests + env: + TRX_NAME: test-results-integration-${{ matrix.os }}-shard${{ matrix.shard }}of${{ matrix.shardTotal }}.trx run: | set -euo pipefail - trx="TestResults/test-results-integration-${{ matrix.os }}.trx" + trx="TestResults/${TRX_NAME}" if [ ! -f "$trx" ]; then echo "::error::Integration TRX not found at $trx — the test step did not produce results." exit 1 @@ -308,7 +348,7 @@ jobs: if: always() uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: - name: test-results-${{ matrix.os }} + name: test-results-${{ matrix.os }}-shard${{ matrix.shard }}of${{ matrix.shardTotal }} path: | TestResults/**/*.trx TestResults/**/coverage.cobertura.xml @@ -318,9 +358,11 @@ jobs: - name: Surface coverage summary in job log if: always() + env: + JOB_LABEL: ${{ matrix.os }} shard ${{ matrix.shard }}/${{ matrix.shardTotal }} run: | if [ -f coverage-report/Summary.txt ]; then - echo "### Coverage summary (${{ matrix.os }})" >> "$GITHUB_STEP_SUMMARY" + echo "### Coverage summary (${JOB_LABEL})" >> "$GITHUB_STEP_SUMMARY" echo '```' >> "$GITHUB_STEP_SUMMARY" cat coverage-report/Summary.txt >> "$GITHUB_STEP_SUMMARY" echo '```' >> "$GITHUB_STEP_SUMMARY" diff --git a/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpers.cs b/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpers.cs index 6af6d06f0..67d14f2dc 100644 --- a/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpers.cs +++ b/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpers.cs @@ -182,6 +182,94 @@ public static List CollectRoutes(string docsRoot) return best; } + /// + /// Partition into + /// disjoint subsets and return the one identified by 1-based + /// . Distribution is round-robin on the + /// input's index — route at position i is assigned to shard + /// (i % total) + 1 — so a CI matrix can run N shards in + /// parallel and the union of every shard equals the input set. + /// + /// + /// Round-robin (over contiguous chunks) keeps the per-shard wall + /// clock balanced even when expensive routes cluster in the source + /// markdown tree (e.g. the long-tail of generated reference pages + /// that all live under docs/reference/). Within each shard + /// the input's relative order is preserved so failure summaries + /// align with the markdown source order. + /// + /// The 1-based matches the CI matrix + /// dimension's natural numbering (shard: [1, 2, 3, 4]) so + /// the env-var-to-helper mapping is identity. + /// + /// The full route list to partition. Must not be null. + /// 1-based shard index, in [1, total]. + /// Total number of shards, > 0. + /// routes is null. + /// total is <= 0, or index is outside [1, total]. + public static List ShardRoutes(IReadOnlyList routes, int index, int total) + { + if (routes is null) throw new ArgumentNullException(nameof(routes)); + if (total <= 0) + throw new ArgumentOutOfRangeException(nameof(total), total, + "total must be greater than zero"); + if (index < 1 || index > total) + throw new ArgumentOutOfRangeException(nameof(index), index, + $"index must satisfy 1 <= index <= total (total={total})"); + + var result = new List(routes.Count / total + 1); + for (var i = 0; i < routes.Count; i++) + { + if (i % total == index - 1) result.Add(routes[i]); + } + return result; + } + + /// + /// Default environment variable name carrying the 1-based shard + /// index. Unset (or unparseable) defaults to 1. + /// + public const string ShardIndexEnvVar = "ROUTE_SHARD_INDEX"; + + /// + /// Default environment variable name carrying the shard total. + /// Unset (or unparseable) defaults to 1 (no sharding). + /// + public const string ShardTotalEnvVar = "ROUTE_SHARD_TOTAL"; + + /// + /// Read the shard index and total from the + /// / + /// environment variables, defaulting to (1, 1) when either + /// is unset or unparseable. Local invocations without + /// matrix-injected env vars therefore default to "run every route" + /// — the no-sharding identity path. + /// + /// + /// Optional environment-variable accessor; defaults to + /// so tests + /// can inject deterministic values without mutating process state. + /// + public static (int Index, int Total) ReadShardEnv(Func? environment = null) + { + environment ??= Environment.GetEnvironmentVariable; + + var index = 1; + var total = 1; + + if (int.TryParse(environment(ShardIndexEnvVar), out var parsedIndex) && parsedIndex >= 1) + { + index = parsedIndex; + } + if (int.TryParse(environment(ShardTotalEnvVar), out var parsedTotal) && parsedTotal >= 1) + { + total = parsedTotal; + } + if (index > total) index = total; // graceful clamp if env is half-set + + return (index, total); + } + /// /// Ask the OS for a currently-free TCP port on the loopback interface /// by binding ephemerally, reading the assigned port, and releasing. diff --git a/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs b/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs index 8eb235240..548c8bd52 100644 --- a/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs +++ b/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs @@ -784,6 +784,117 @@ public void ShardRoutes_FewerRoutesThanShards_SurplusShardsAreEmpty() Assert.That(shard4, Is.Empty); } + // ─── ReadShardEnv ──────────────────────────────────────────────────────── + + /// + /// Both env vars set to representative values surface as a + /// (index, total) tuple. Pins the CI-matrix-injection + /// contract: ROUTE_SHARD_INDEX=3 + ROUTE_SHARD_TOTAL=4 + /// produces shard 3 of 4 regardless of process-level env state. + /// + [Test] + public void ReadShardEnv_BothSet_ReturnsParsedTuple() + { + string? Env(string key) => key switch + { + RouteCheckHelpers.ShardIndexEnvVar => "3", + RouteCheckHelpers.ShardTotalEnvVar => "4", + _ => null, + }; + var (index, total) = RouteCheckHelpers.ReadShardEnv(Env); + Assert.That(index, Is.EqualTo(3)); + Assert.That(total, Is.EqualTo(4)); + } + + /// + /// Unset env vars produce the no-sharding identity tuple + /// (1, 1). Local dotnet test runs without + /// matrix-injected env vars therefore walk every route on a single + /// shard. + /// + [Test] + public void ReadShardEnv_BothUnset_ReturnsIdentityTuple() + { + string? Env(string key) => null; + var (index, total) = RouteCheckHelpers.ReadShardEnv(Env); + Assert.That(index, Is.EqualTo(1)); + Assert.That(total, Is.EqualTo(1)); + } + + /// + /// Unparseable values fall back to the identity tuple — a + /// misconfigured CI matrix that injected an empty string or a + /// non-numeric token should walk every route rather than throw + /// during fixture init. + /// + [TestCase("", "")] + [TestCase("not-a-number", "also-not")] + [TestCase("0", "0")] + [TestCase("-1", "-1")] + public void ReadShardEnv_UnparseableValues_FallBackToIdentity(string indexValue, string totalValue) + { + string? Env(string key) => key switch + { + RouteCheckHelpers.ShardIndexEnvVar => indexValue, + RouteCheckHelpers.ShardTotalEnvVar => totalValue, + _ => null, + }; + var (index, total) = RouteCheckHelpers.ReadShardEnv(Env); + Assert.That(index, Is.EqualTo(1)); + Assert.That(total, Is.EqualTo(1)); + } + + /// + /// When ROUTE_SHARD_INDEX exceeds ROUTE_SHARD_TOTAL + /// (a half-set matrix, e.g. index=5 with total=4), + /// the helper clamps the index to the total so the caller still + /// receives a valid shard rather than throwing out of + /// downstream. This + /// closes the gap between "env vars set" and "env vars consistent." + /// + [Test] + public void ReadShardEnv_IndexAboveTotal_ClampsToTotal() + { + string? Env(string key) => key switch + { + RouteCheckHelpers.ShardIndexEnvVar => "5", + RouteCheckHelpers.ShardTotalEnvVar => "4", + _ => null, + }; + var (index, total) = RouteCheckHelpers.ReadShardEnv(Env); + Assert.That(index, Is.EqualTo(4)); + Assert.That(total, Is.EqualTo(4)); + } + + /// + /// The default + /// overload reads from . + /// Smoke-checking the unset-by-default path keeps the + /// process-level env-var access wired without forcing a separate + /// integration-tier setup. + /// + [Test] + public void ReadShardEnv_DefaultAccessor_ReadsProcessEnvironment() + { + // Save / restore so a parallel test in the same process never + // observes the mutation. + var savedIndex = Environment.GetEnvironmentVariable(RouteCheckHelpers.ShardIndexEnvVar); + var savedTotal = Environment.GetEnvironmentVariable(RouteCheckHelpers.ShardTotalEnvVar); + try + { + Environment.SetEnvironmentVariable(RouteCheckHelpers.ShardIndexEnvVar, "2"); + Environment.SetEnvironmentVariable(RouteCheckHelpers.ShardTotalEnvVar, "4"); + var (index, total) = RouteCheckHelpers.ReadShardEnv(); + Assert.That(index, Is.EqualTo(2)); + Assert.That(total, Is.EqualTo(4)); + } + finally + { + Environment.SetEnvironmentVariable(RouteCheckHelpers.ShardIndexEnvVar, savedIndex); + Environment.SetEnvironmentVariable(RouteCheckHelpers.ShardTotalEnvVar, savedTotal); + } + } + // ─── Helper: scoped temp directory ─────────────────────────────────────── /// diff --git a/tests/MTConnect.NET-Docs-Tests/RouteCheckTests.cs b/tests/MTConnect.NET-Docs-Tests/RouteCheckTests.cs index 7b0486cc1..ab5a38142 100644 --- a/tests/MTConnect.NET-Docs-Tests/RouteCheckTests.cs +++ b/tests/MTConnect.NET-Docs-Tests/RouteCheckTests.cs @@ -465,13 +465,39 @@ public void OneTimeSetUp_Rebuilds_Dist_Even_When_Index_Exists() /// workers and reported in a single ordered summary so the diff /// is reviewable in one assertion message. /// + /// + /// Honours the ROUTE_SHARD_INDEX / ROUTE_SHARD_TOTAL + /// environment variables so a CI matrix can parallelise the walk + /// across N legs. Unset env vars collapse to the no-sharding + /// identity path (1 of 1 — every route on a single shard); + /// local dotnet test runs therefore exercise the full route + /// set without any extra ceremony. + /// [Test] public async Task Every_Markdown_Backed_Route_Resolves_Without_A_404() { Assert.That(_browser, Is.Not.Null, "browser was not initialised"); - var routes = RouteCheckHelpers.CollectRoutes(_docsRoot); - Assert.That(routes.Count, Is.GreaterThan(0), "expected at least one markdown-backed route"); + var allRoutes = RouteCheckHelpers.CollectRoutes(_docsRoot); + Assert.That(allRoutes.Count, Is.GreaterThan(0), "expected at least one markdown-backed route"); + + // CI matrix dimension `shard: [1, 2, 3, 4]` injects the two env + // vars below; local runs leave them unset and walk every route. + // The helper returns (1, 1) on unset/unparseable input, so the + // shard slice collapses to the input on the no-sharding path. + var (shardIndex, shardTotal) = RouteCheckHelpers.ReadShardEnv(); + var routes = RouteCheckHelpers.ShardRoutes(allRoutes, shardIndex, shardTotal); + TestContext.Out.WriteLine( + $"Route shard {shardIndex} of {shardTotal}: walking {routes.Count} of {allRoutes.Count} route(s)"); + + // A surplus shard (more shards than routes) legitimately walks + // zero routes; that is success, not failure. + if (routes.Count == 0) + { + TestContext.Out.WriteLine( + $"shard {shardIndex}/{shardTotal} is empty (more shards than routes); nothing to walk"); + return; + } var failures = await WalkRoutesAsync(_browser!, _baseUrl, routes, Concurrency); From ace0f43bbb5decbc6a58cab230a621fdf2e98820 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Otto=20Boly=C3=B3s?= Date: Wed, 3 Jun 2026 07:07:19 +0200 Subject: [PATCH 3/7] fix(docs-tests): use List.IndexOf in ShardRoutes order test IReadOnlyList has no IndexOf extension on the target framework; materialise the sample routes to List before looking up positions in the order-preservation assertion. --- tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs b/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs index 548c8bd52..00c4f63d9 100644 --- a/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs +++ b/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs @@ -670,10 +670,11 @@ public void ShardRoutes_DifferentShards_AreDisjoint() public void ShardRoutes_WithinAShard_PreservesInputOrder() { const int total = 4; + var inputAsList = SampleRoutes.ToList(); for (var i = 1; i <= total; i++) { var shard = RouteCheckHelpers.ShardRoutes(SampleRoutes, i, total); - var indices = shard.Select(r => SampleRoutes.IndexOf(r)).ToList(); + var indices = shard.Select(r => inputAsList.IndexOf(r)).ToList(); Assert.That(indices, Is.Ordered.Ascending, $"shard {i}/{total} routes are out of order relative to the input"); } From 597c4289f8ebcfcfee86d8e9fe4fb88de27305dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Otto=20Boly=C3=B3s?= Date: Wed, 3 Jun 2026 11:46:15 +0200 Subject: [PATCH 4/7] docs(docs-tests): scrub rule reference from shard helper test doc --- tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs b/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs index 00c4f63d9..7123c4d82 100644 --- a/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs +++ b/tests/MTConnect.NET-Docs-Tests/RouteCheckHelpersTests.cs @@ -707,9 +707,9 @@ public void ShardRoutes_AssignmentFollowsRoundRobinModulus() /// /// A null routes list throws - /// with the parameter name pinned per §10a — a future caller that - /// passes null by accident gets a typed failure naming the - /// argument rather than an opaque NRE. + /// with the parameter name pinned — a future caller that passes + /// null by accident gets a typed failure naming the argument + /// rather than an opaque NRE. /// [Test] public void ShardRoutes_NullRoutes_Throws() From 2c9a50fa79f12c3ac5c68a284452fa6792f3369d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Otto=20Boly=C3=B3s?= Date: Wed, 3 Jun 2026 11:48:10 +0200 Subject: [PATCH 5/7] ci(repo): split unsharded suite from sharded route-check fan-out --- .github/workflows/dotnet.yml | 460 +++++++++++++++++++++-------------- 1 file changed, 272 insertions(+), 188 deletions(-) diff --git a/.github/workflows/dotnet.yml b/.github/workflows/dotnet.yml index 49b33b0f6..4bf2a6ec3 100644 --- a/.github/workflows/dotnet.yml +++ b/.github/workflows/dotnet.yml @@ -1,7 +1,7 @@ name: build-test-coverage -# Restrict the GITHUB_TOKEN to read-only contents access. The job only -# needs to checkout the repo, run dotnet build / test, and upload TRX + +# Restrict the GITHUB_TOKEN to read-only contents access. The jobs only +# need to checkout the repo, run dotnet build / test, and upload TRX + # coverage artifacts; no commit / release / package-write privileges are # required. Defense-in-depth against supply-chain attacks via a # compromised dependency or a test-side RCE. @@ -24,60 +24,46 @@ on: - 'docs/**' jobs: + # ------------------------------------------------------------------ + # Job 1 — unsharded unit + integration sweep on both OS legs. + # Excludes Category=E2E altogether; the route walk is handled in + # job 3 below. windows-latest additionally excludes + # Category=RequiresDocker (hosted Windows runners do NOT pre-install + # Docker for Linux containers — only Windows containers via Hyper-V). + # XsdLoadStrict has no platform dependency and still runs on both + # legs. This job carries no Node / docfx / npm overhead: every step + # it runs is a pure .NET workload, so the docs-build wall-clock is + # paid exactly once across the whole workflow (in job 2 below). + # ------------------------------------------------------------------ build-and-test: - name: build-and-test-${{ matrix.os }}-shard${{ matrix.shard }}of${{ matrix.shardTotal }} + name: build-and-test-${{ matrix.os }} # Skip drafts: run only on push-to-master + ready (non-draft) PRs. - # The pull_request `types` list above includes `ready_for_review` so - # CI fires the moment a draft is flipped to ready. + # The pull_request `types` list above includes `ready_for_review` + # so CI fires the moment a draft is flipped to ready. if: github.event_name == 'push' || github.event.pull_request.draft == false runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - # The ubuntu-latest leg runs Category=E2E, dominated wall-clock - # by the RouteCheckTests Playwright route walk. Sharding the - # walk across N parallel matrix entries on ubuntu drops the - # critical-path wall-clock from N*single-shard down to roughly - # single-shard; each shard env-injects ROUTE_SHARD_INDEX / - # ROUTE_SHARD_TOTAL into the test runner, and RouteCheckTests - # honours those via RouteCheckHelpers.ReadShardEnv + - # RouteCheckHelpers.ShardRoutes. windows-latest excludes - # Category=E2E altogether (no Docker for Linux images) so it - # has no shardable work; the exclude block below caps it at - # shard 1 of 4. os: [ubuntu-latest, windows-latest] - shard: [1, 2, 3, 4] - shardTotal: [4] include: - # ubuntu-latest runs the full sweep: Docker is pre-installed - # on GitHub-hosted Linux runners and Testcontainers can spawn - # the eclipse-mosquitto + mtconnect/agent images the - # RequiresDocker / E2E suites need; the XSD source-of-truth + # ubuntu-latest runs every non-E2E category: Docker is pre- + # installed on GitHub-hosted Linux runners and Testcontainers + # can spawn the eclipse-mosquitto + mtconnect/agent images + # the RequiresDocker suites need; the XSD source-of-truth # files ship in-repo, so XsdLoadStrict also has every - # prerequisite. Empty filter = no exclusions. + # prerequisite. The shared filter excludes E2E only. - os: ubuntu-latest - testFilter: "" + testFilter: "Category!=E2E" # windows-latest GitHub-hosted runners do NOT pre-install # Docker for Linux containers (only Windows containers via - # Hyper-V), so the RequiresDocker / E2E suites that boot - # Linux images (eclipse-mosquitto:2.0.22, - # mtconnect/agent:latest) cannot run there. Skip both - # categories on the Windows leg; XsdLoadStrict has no - # platform dependency and still runs. + # Hyper-V), so the RequiresDocker suites that boot Linux + # images (eclipse-mosquitto:2.0.22, mtconnect/agent:latest) + # cannot run there. E2E is already excluded for every leg of + # this job. XsdLoadStrict has no platform dependency and + # still runs. - os: windows-latest testFilter: "Category!=RequiresDocker&Category!=E2E" - exclude: - # windows-latest never runs Category=E2E (testFilter excludes - # it). Running every windows shard would therefore re-run the - # same unit + integration suite four times for no coverage - # gain. Cap windows at shard 1 of 4 — the non-E2E suite still - # runs once; the E2E sharding is ubuntu-only. - - os: windows-latest - shard: 2 - - os: windows-latest - shard: 3 - - os: windows-latest - shard: 4 steps: - name: Checkout @@ -90,35 +76,6 @@ jobs: 8.0.x 9.0.x - # MTConnect.NET-Docs-Tests carries a Category=E2E route walk - # whose [OneTimeSetUp] runs `npm ci && npm run build` from - # docs/ when the dist/ artifact is missing, then drives - # `vitepress preview` through Microsoft.Playwright. Only the - # ubuntu-latest leg runs Category=E2E (the windows-latest leg - # excludes it via testFilter), so Node is only required there. - - name: Setup Node.js (docs e2e prerequisite) - if: matrix.os == 'ubuntu-latest' - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 - with: - node-version: '20' - cache: npm - cache-dependency-path: docs/package-lock.json - - # `npm run build` in docs/ invokes the `prebuild` hook - # (`npm run regen`), which calls `bash scripts/generate-api-ref.sh` - # → `docfx metadata` to produce the /api/ reference tree the - # VitePress site consumes. The Docs-site workflow installs docfx - # in its Prepare job; this workflow's E2E leg needs it for the - # same reason. Only the ubuntu-latest leg runs Category=E2E - # (windows-latest excludes it via testFilter), so docfx is only - # required there. Without this, RouteCheckTests.OneTimeSetUp - # fails with "docfx not found on PATH and ~/.dotnet/tools/docfx - # is missing" — see PR #181 run 26831500675 job 79113784371. - - name: Install docfx (docs e2e prerequisite) - if: matrix.os == 'ubuntu-latest' - run: dotnet tool install -g docfx - shell: bash - - name: Restore dotnet tools (ReportGenerator) run: dotnet tool restore @@ -128,34 +85,6 @@ jobs: - name: Build (Debug) run: dotnet build MTConnect.NET.sln --configuration Debug --no-restore - # Cache Playwright browser binaries so the Category=E2E - # [OneTimeSetUp] `playwright install chromium` call is a no-op on - # cache hit. The key hashes the `playwright-core` package.json that - # the Microsoft.Playwright NuGet copies into the build output — - # that file's `version` field directly pins the chromium revision - # the .NET binding drives. Hashing the csproj instead would only - # invalidate by coincidence (a future PR adding an unrelated - # PackageReference would invalidate; an SDK-side change to the - # chromium revision would not). The step therefore runs AFTER - # `dotnet build` so the file exists — placement before build - # would force the key off a workspace-internal artefact (hashFiles - # cannot reach the NuGet cache under ~/.nuget), so the - # micro-optimisation of overlapping cache restore with build is - # sacrificed for key correctness. - # Both Linux (~/.cache/ms-playwright) and Windows - # (%USERPROFILE%\AppData\Local\ms-playwright) paths are listed; - # actions/cache restores only the path that exists on the running - # OS, so the multi-path entry is safe for both matrix legs. - - name: Cache Playwright browsers - uses: actions/cache@v4 - with: - path: | - ~/.cache/ms-playwright - ~\AppData\Local\ms-playwright - key: ${{ runner.os }}-playwright-${{ hashFiles('**/tests/MTConnect.NET-Docs-Tests/bin/Debug/net8.0/.playwright/package/package.json') }} - restore-keys: | - ${{ runner.os }}-playwright- - # MTConnect.NET-Integration-Tests is skipped here (its # IsTestProject is false unless IntegrationCoverage=true) and run # in the dedicated step below so it can use its own @@ -163,54 +92,29 @@ jobs: # CLI --settings > RunSettingsFilePath, so a per-project settings # file cannot win against this solution-wide --settings; running # the project as its own step is the only way to give it - # different coverage scoping. See the integration .csproj comment. - # - # The --filter expression is matrix-controlled so each OS leg runs - # the categories it can physically support. ubuntu-latest carries - # an empty filter (the full sweep: RequiresDocker + E2E + - # XsdLoadStrict all included); windows-latest carries - # Category!=RequiresDocker&Category!=E2E (no Linux-image Docker - # on the hosted Windows runner). See the strategy.matrix.include - # block above for the rationale. + # different coverage scoping. See the integration .csproj + # comment. - name: Run unit tests with coverage env: TEST_FILTER: ${{ matrix.testFilter }} - # ROUTE_SHARD_INDEX / ROUTE_SHARD_TOTAL drive RouteCheckTests - # via RouteCheckHelpers.ReadShardEnv -> ShardRoutes, splitting - # the ~1,880-route Playwright walk across N parallel matrix - # entries. Unset / total=1 collapses to the no-sharding path, - # which is the local-invocation default. Integers come from - # the static strategy.matrix definition above, never from - # github.event.* untrusted input. - ROUTE_SHARD_INDEX: ${{ matrix.shard }} - ROUTE_SHARD_TOTAL: ${{ matrix.shardTotal }} - TRX_NAME: test-results-${{ matrix.os }}-shard${{ matrix.shard }}of${{ matrix.shardTotal }}.trx + TRX_NAME: test-results-${{ matrix.os }}.trx run: | - if [ -n "$TEST_FILTER" ]; then - dotnet test MTConnect.NET.sln \ - --configuration Debug \ - --no-build \ - --settings tests/coverlet.runsettings \ - --results-directory TestResults \ - --logger "trx;LogFileName=${TRX_NAME}" \ - --filter "$TEST_FILTER" - else - dotnet test MTConnect.NET.sln \ - --configuration Debug \ - --no-build \ - --settings tests/coverlet.runsettings \ - --results-directory TestResults \ - --logger "trx;LogFileName=${TRX_NAME}" - fi + dotnet test MTConnect.NET.sln \ + --configuration Debug \ + --no-build \ + --settings tests/coverlet.runsettings \ + --results-directory TestResults \ + --logger "trx;LogFileName=${TRX_NAME}" \ + --filter "$TEST_FILTER" shell: bash # Same zero-test false-green guard as the integration step, on - # the solution-wide unit run. A broken adapter wiring or an over- - # eager filter that discovers nothing must hard-fail rather than - # exit 0. + # the solution-wide unit run. A broken adapter wiring or an + # over-eager filter that discovers nothing must hard-fail rather + # than exit 0. - name: Guard — unit step executed > 0 tests env: - TRX_NAME: test-results-${{ matrix.os }}-shard${{ matrix.shard }}of${{ matrix.shardTotal }}.trx + TRX_NAME: test-results-${{ matrix.os }}.trx run: | set -euo pipefail trx="TestResults/${TRX_NAME}" @@ -234,8 +138,8 @@ jobs: # Integration suite with its own coverlet settings: the shared # configuration plus MTConnect.NET-Common and MTConnect.NET-HTTP # excluded from instrumentation, and MaxCpuCount=1 (this step is - # serial; the shared file no longer pins it). Common+HTTP are the - # dominant-cost assemblies on the timing-critical in-process + # serial; the shared file no longer pins it). Common+HTTP are + # the dominant-cost assemblies on the timing-critical in-process # Agent/HTTP hot path this suite drives; coverlet IL # instrumentation of them on a slow shared runner pushes sample # delivery past the test's wait. Both are covered (faster) by @@ -243,12 +147,12 @@ jobs: # latter (with its AgentRunner support project # MTConnect.NET-Tests-Agents) is a member of MTConnect.NET.sln, # so the "Run unit tests with coverage" step above already - # discovers and runs it under the shared runsettings; it starts a - # real MTConnectAgentBroker + MTConnectHttpServer and streams + # discovers and runs it under the shared runsettings; it starts + # a real MTConnectAgentBroker + MTConnectHttpServer and streams # Probe/Current/Sample through the HTTP clients, exercising the - # Common + HTTP runtime path. Its Cobertura ReportGenerator merges - # with this one, so excluding Common+HTTP here loses no net - # coverage. MTConnect.NET-SHDR stays instrumented (the + # Common + HTTP runtime path. Its Cobertura ReportGenerator + # merges with this one, so excluding Common+HTTP here loses no + # net coverage. MTConnect.NET-SHDR stays instrumented (the # integration suite is its only runtime coverage). # # The integration .csproj sets IsTestProject=false unless @@ -257,19 +161,8 @@ jobs: # project WITH -p:IntegrationCoverage=true (so xunit's VSTest # adapter is wired in and tests are discoverable) and only then # runs it with --no-build. Without the explicit build the - # solution-built non-test assembly would discover 0 tests and the - # step would false-green. - # - # As with the unit step, the integration step's --filter is - # matrix-controlled: ubuntu-latest runs the full sweep - # (RequiresDocker + E2E + XsdLoadStrict — the campaign-added - # MqttRelay / JsonMqttProbeEnvelope / HttpProbe / HttpAsset / - # ConfigurationPolymorphicHttpProbe workflow tests all run on a - # real broker via Testcontainers), windows-latest excludes - # RequiresDocker + E2E (no Linux-image Docker on the hosted - # Windows runner). ClientAgentCommunicationTests, - # GenerateDevicesXmlTests and HttpServerLoopbackBindingTests - # carry no category and run on both legs unconditionally. + # solution-built non-test assembly would discover 0 tests and + # the step would false-green. - name: Build integration project with IntegrationCoverage flag run: | dotnet build tests/MTConnect.NET-Integration-Tests/MTConnect.NET-Integration-Tests.csproj \ @@ -281,37 +174,27 @@ jobs: - name: Run integration tests with coverage env: TEST_FILTER: ${{ matrix.testFilter }} - TRX_NAME: test-results-integration-${{ matrix.os }}-shard${{ matrix.shard }}of${{ matrix.shardTotal }}.trx + TRX_NAME: test-results-integration-${{ matrix.os }}.trx run: | - if [ -n "$TEST_FILTER" ]; then - dotnet test tests/MTConnect.NET-Integration-Tests/MTConnect.NET-Integration-Tests.csproj \ - --configuration Debug \ - --no-build \ - -p:IntegrationCoverage=true \ - --settings tests/coverlet.integration.runsettings \ - --results-directory TestResults \ - --logger "trx;LogFileName=${TRX_NAME}" \ - --filter "$TEST_FILTER" - else - dotnet test tests/MTConnect.NET-Integration-Tests/MTConnect.NET-Integration-Tests.csproj \ - --configuration Debug \ - --no-build \ - -p:IntegrationCoverage=true \ - --settings tests/coverlet.integration.runsettings \ - --results-directory TestResults \ - --logger "trx;LogFileName=${TRX_NAME}" - fi + dotnet test tests/MTConnect.NET-Integration-Tests/MTConnect.NET-Integration-Tests.csproj \ + --configuration Debug \ + --no-build \ + -p:IntegrationCoverage=true \ + --settings tests/coverlet.integration.runsettings \ + --results-directory TestResults \ + --logger "trx;LogFileName=${TRX_NAME}" \ + --filter "$TEST_FILTER" shell: bash - # Hard-fail the job if the integration step discovered/executed 0 - # tests. A future regression that re-breaks test discovery (e.g. - # the IntegrationCoverage build wiring) would otherwise false- - # green: dotnet test exits 0 when it finds nothing to run. Parse - # the TRX total and fail when it is 0 or the - # element is missing. + # Hard-fail the job if the integration step discovered/executed + # 0 tests. A future regression that re-breaks test discovery + # (e.g. the IntegrationCoverage build wiring) would otherwise + # false-green: dotnet test exits 0 when it finds nothing to run. + # Parse the TRX total and fail when it is 0 or + # the element is missing. - name: Guard — integration step executed > 0 tests env: - TRX_NAME: test-results-integration-${{ matrix.os }}-shard${{ matrix.shard }}of${{ matrix.shardTotal }}.trx + TRX_NAME: test-results-integration-${{ matrix.os }}.trx run: | set -euo pipefail trx="TestResults/${TRX_NAME}" @@ -319,9 +202,6 @@ jobs: echo "::error::Integration TRX not found at $trx — the test step did not produce results." exit 1 fi - # Extract the `total` attribute of the single - # element the VSTest TRX logger writes. grep -o keeps this - # dependency-free (no xmllint on the runner image). total="$(grep -o ']*total="[0-9]*"' "$trx" \ | grep -o 'total="[0-9]*"' | grep -o '[0-9]*' | head -n1)" if [ -z "${total:-}" ]; then @@ -348,7 +228,7 @@ jobs: if: always() uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 with: - name: test-results-${{ matrix.os }}-shard${{ matrix.shard }}of${{ matrix.shardTotal }} + name: test-results-${{ matrix.os }} path: | TestResults/**/*.trx TestResults/**/coverage.cobertura.xml @@ -359,7 +239,7 @@ jobs: - name: Surface coverage summary in job log if: always() env: - JOB_LABEL: ${{ matrix.os }} shard ${{ matrix.shard }}/${{ matrix.shardTotal }} + JOB_LABEL: ${{ matrix.os }} run: | if [ -f coverage-report/Summary.txt ]; then echo "### Coverage summary (${JOB_LABEL})" >> "$GITHUB_STEP_SUMMARY" @@ -368,3 +248,207 @@ jobs: echo '```' >> "$GITHUB_STEP_SUMMARY" fi shell: bash + + # ------------------------------------------------------------------ + # Job 2 — docs preparation. Runs once. Produces docs/.vitepress/dist + # by invoking `npm ci && npm run build`, which transitively triggers + # the `prebuild` hook (`npm run regen` → `bash scripts/generate-api- + # ref.sh` → `docfx metadata`). The built dist tree is uploaded as a + # workflow artifact so the sharded route-check job below can skip + # the npm + docfx + build wall-clock (~5 min) entirely. The test + # fixture's [OneTimeSetUp] checks for docs/.vitepress/dist/index.html + # and skips the `npm ci && npm run build` bootstrap when the file is + # present, so a shard that downloads the artifact into the right + # path bypasses the bootstrap altogether. + # ------------------------------------------------------------------ + docs-prepare: + name: docs-prepare + if: github.event_name == 'push' || github.event.pull_request.draft == false + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + + - name: Setup .NET 8.0 + 9.0 + uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4 + with: + dotnet-version: | + 8.0.x + 9.0.x + + - name: Setup Node.js + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: '20' + cache: npm + cache-dependency-path: docs/package-lock.json + + # `npm run build` in docs/ invokes the `prebuild` hook (`npm run + # regen`), which calls `bash scripts/generate-api-ref.sh` → + # `docfx metadata` to produce the /api/ reference tree the + # VitePress site consumes. + - name: Install docfx + run: dotnet tool install -g docfx + shell: bash + + - name: npm ci + working-directory: docs + run: npm ci + + - name: npm run build + working-directory: docs + run: npm run build + + - name: Upload docs dist artifact + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: docs-dist + path: docs/.vitepress/dist/ + if-no-files-found: error + retention-days: 1 + + # ------------------------------------------------------------------ + # Job 3 — sharded Category=E2E route walk. Four parallel ubuntu + # shards each carry a disjoint subset of the markdown-backed routes + # via ROUTE_SHARD_INDEX / ROUTE_SHARD_TOTAL env vars (consumed by + # RouteCheckHelpers.ReadShardEnv + ShardRoutes). The docs-dist + # artifact from job 2 is downloaded into docs/.vitepress/dist/ so + # the test fixture's [OneTimeSetUp] skips the npm bootstrap (it + # detects the pre-existing index.html). npm ci still runs to + # populate docs/node_modules/ so `npx vitepress preview` finds the + # local vitepress binary; that step is fast on the npm cache hit + # primed by `actions/setup-node`. docfx is NOT installed here — the + # prebuild hook only fires from `npm run build`, which this job + # skips. Each shard runs only `Category=E2E`, so the non-E2E + # categories run exactly once (in job 1) across the workflow. + # ------------------------------------------------------------------ + route-check-e2e: + name: route-check-e2e-shard${{ matrix.shard }}of${{ matrix.shardTotal }} + if: github.event_name == 'push' || github.event.pull_request.draft == false + needs: docs-prepare + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + shard: [1, 2, 3, 4] + shardTotal: [4] + steps: + - name: Checkout + uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4 + + - name: Setup .NET 8.0 + 9.0 + uses: actions/setup-dotnet@67a3573c9a986a3f9c594539f4ab511d57bb3ce9 # v4 + with: + dotnet-version: | + 8.0.x + 9.0.x + + - name: Setup Node.js + uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 + with: + node-version: '20' + cache: npm + cache-dependency-path: docs/package-lock.json + + - name: Restore dotnet tools (ReportGenerator) + run: dotnet tool restore + + - name: Restore solution + run: dotnet restore MTConnect.NET.sln + + - name: Build (Debug) + run: dotnet build MTConnect.NET.sln --configuration Debug --no-restore + + # Populate docs/node_modules/ so `npx vitepress preview` resolves + # the local vitepress binary the test fixture spawns. The npm + # cache primed by actions/setup-node above makes this fast on + # warm runners; first-pull-after-bump pays the full cost once + # per shard. Skipping `npm run build` is the actual win — that's + # the ~5 min docfx + vitepress build that job 2 paid once. + - name: npm ci + working-directory: docs + run: npm ci + + - name: Download docs dist artifact + uses: actions/download-artifact@v4 + with: + name: docs-dist + path: docs/.vitepress/dist/ + + # Cache Playwright browser binaries so the Category=E2E + # [OneTimeSetUp] `playwright install chromium` call is a no-op + # on cache hit. The key hashes the `playwright-core` + # package.json that the Microsoft.Playwright NuGet copies into + # the build output — that file's `version` field directly pins + # the chromium revision the .NET binding drives. Hashing the + # csproj instead would only invalidate by coincidence (a future + # PR adding an unrelated PackageReference would invalidate; an + # SDK-side change to the chromium revision would not). The step + # therefore runs AFTER `dotnet build` so the file exists — + # placement before build would force the key off a workspace- + # internal artefact (hashFiles cannot reach the NuGet cache + # under ~/.nuget), so the micro-optimisation of overlapping + # cache restore with build is sacrificed for key correctness. + - name: Cache Playwright browsers + uses: actions/cache@v4 + with: + path: | + ~/.cache/ms-playwright + key: ${{ runner.os }}-playwright-${{ hashFiles('**/tests/MTConnect.NET-Docs-Tests/bin/Debug/net8.0/.playwright/package/package.json') }} + restore-keys: | + ${{ runner.os }}-playwright- + + - name: Run E2E route-check shard + env: + # ROUTE_SHARD_INDEX / ROUTE_SHARD_TOTAL drive + # RouteCheckTests via RouteCheckHelpers.ReadShardEnv -> + # ShardRoutes, splitting the ~1,880-route Playwright walk + # across N parallel matrix entries. Integers come from the + # static strategy.matrix definition above, never from + # github.event.* untrusted input. + ROUTE_SHARD_INDEX: ${{ matrix.shard }} + ROUTE_SHARD_TOTAL: ${{ matrix.shardTotal }} + TRX_NAME: test-results-e2e-shard${{ matrix.shard }}of${{ matrix.shardTotal }}.trx + run: | + dotnet test tests/MTConnect.NET-Docs-Tests/MTConnect.NET-Docs-Tests.csproj \ + --configuration Debug \ + --no-build \ + --settings tests/coverlet.runsettings \ + --results-directory TestResults \ + --logger "trx;LogFileName=${TRX_NAME}" \ + --filter "Category=E2E" + shell: bash + + - name: Guard — E2E shard executed > 0 tests + env: + TRX_NAME: test-results-e2e-shard${{ matrix.shard }}of${{ matrix.shardTotal }}.trx + run: | + set -euo pipefail + trx="TestResults/${TRX_NAME}" + if [ ! -f "$trx" ]; then + echo "::error::E2E TRX not found at $trx — the test step did not produce results." + exit 1 + fi + total="$(grep -o ']*total="[0-9]*"' "$trx" \ + | grep -o 'total="[0-9]*"' | grep -o '[0-9]*' | head -n1)" + if [ -z "${total:-}" ]; then + echo "::error::Could not parse from $trx — refusing to false-green." + exit 1 + fi + echo "E2E tests executed: $total" + if [ "$total" -eq 0 ]; then + echo "::error::E2E shard executed 0 tests — discovery is broken (zero-test false-green guard)." + exit 1 + fi + shell: bash + + - name: Upload E2E TRX + coverage artifacts + if: always() + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4 + with: + name: test-results-e2e-shard${{ matrix.shard }}of${{ matrix.shardTotal }} + path: | + TestResults/**/*.trx + TestResults/**/coverage.cobertura.xml + if-no-files-found: warn + retention-days: 14 From 6596449929f79f4cb37cc1aabb8700d20a274de2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Otto=20Boly=C3=B3s?= Date: Wed, 3 Jun 2026 11:48:38 +0200 Subject: [PATCH 6/7] docs(development): document the sharded route-check CI architecture --- docs/development/docs-site.md | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/docs/development/docs-site.md b/docs/development/docs-site.md index 73dd2497e..bc88dc565 100644 --- a/docs/development/docs-site.md +++ b/docs/development/docs-site.md @@ -82,6 +82,25 @@ Failure output names every route that surfaced as a 404 along with which of the The negative companion test (`A_Synthetic_Unmapped_Route_Surfaces_As_A_404`) pins the detector itself: a future Playwright or VitePress upgrade that breaks one of the two 404 signals would still see every real route render fine, but the negative test would go red and surface the regression before a real 404 slipped past. +### Sharded execution + +CI parallelises the route walk across four Ubuntu shards declared in the `route-check-e2e` job of `.github/workflows/dotnet.yml`. Each shard reads two environment variables and walks a disjoint subset of the collected routes: + +| Variable | Meaning | +| --- | --- | +| `ROUTE_SHARD_INDEX` | One-based index of this shard (1 to `ROUTE_SHARD_TOTAL`). | +| `ROUTE_SHARD_TOTAL` | Total shard count. Unset, blank, unparseable, or `1` collapses to the no-sharding path — every route on a single shard. | + +Routes are distributed round-robin by modulus: shard `i` of `N` walks every route whose zero-based position in the sorted collection satisfies `position % N == i - 1`. Distribution is therefore deterministic and stable across runs, and even shard sizes hold within ±1 for any route count. + +Reproduce a specific shard locally: + +``` +ROUTE_SHARD_INDEX=2 ROUTE_SHARD_TOTAL=4 dotnet test tests/MTConnect.NET-Docs-Tests --filter Category=E2E +``` + +Leaving the variables unset is the no-sharding default — every route on one walk — which is how `dotnet test … --filter Category=E2E` runs without extra ceremony. The CI shape reduces wall-clock for the route walk from around 24 minutes (the unsharded full walk) to around 6 minutes (the longest single shard), while the docs build itself runs exactly once in the `docs-prepare` job and the resulting `docs/.vitepress/dist/` artifact is downloaded by every shard so the bootstrap is paid once across the whole workflow. + ## See also - [Release builder](/development/builder)—the in-tree tool that assembles binary release artifacts (NuGet packages, installers, Docker images). Distinct from the documentation site build covered above. From ab34927762c6c9bf22e42ae4c87928d554f8b1b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Otto=20Boly=C3=B3s?= Date: Wed, 3 Jun 2026 11:49:03 +0200 Subject: [PATCH 7/7] test(docs-tests): log the shard's route enumeration before walk --- tests/MTConnect.NET-Docs-Tests/RouteCheckTests.cs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/MTConnect.NET-Docs-Tests/RouteCheckTests.cs b/tests/MTConnect.NET-Docs-Tests/RouteCheckTests.cs index ab5a38142..5e7f842c1 100644 --- a/tests/MTConnect.NET-Docs-Tests/RouteCheckTests.cs +++ b/tests/MTConnect.NET-Docs-Tests/RouteCheckTests.cs @@ -489,6 +489,14 @@ public async Task Every_Markdown_Backed_Route_Resolves_Without_A_404() var routes = RouteCheckHelpers.ShardRoutes(allRoutes, shardIndex, shardTotal); TestContext.Out.WriteLine( $"Route shard {shardIndex} of {shardTotal}: walking {routes.Count} of {allRoutes.Count} route(s)"); + // Enumerate the actual route paths the shard owns so a CI + // failure log is self-describing — a reviewer can see at a + // glance which subset of the markdown tree this shard walked + // without re-deriving the modulus mapping by hand. + foreach (var route in routes) + { + TestContext.Out.WriteLine($" shard {shardIndex}/{shardTotal} route: {route}"); + } // A surplus shard (more shards than routes) legitimately walks // zero routes; that is success, not failure.