From 5b0e352e00e8cc7965de3ef341ac16558e329378 Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Fri, 26 Jun 2026 17:42:26 -0400 Subject: [PATCH 01/12] Retry TestCustomAuthorizerApp deployment on transient IAM role propagation failure The TestCustomAuthorizerApp integration test stack deploys many Lambda functions that reference IAM roles created in the same stack. CloudFormation occasionally calls Lambda CreateFunction before the role's trust policy has propagated through IAM, producing "The role defined for the function cannot be assumed by Lambda" and rolling the whole stack back, which fails all 20 tests in the project. Wrap the deploy in a retry loop (3 attempts). Between attempts, delete the rolled-back stack (a ROLLBACK_COMPLETE stack cannot be re-created) and pause briefly to let IAM settle. Surface CloudFormation failed-resource events on each failure for easier debugging. --- .../DeploymentScript.ps1 | 48 +++++++++++++++++-- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/DeploymentScript.ps1 b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/DeploymentScript.ps1 index 4fe9313c0..3a20e55de 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/DeploymentScript.ps1 +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/DeploymentScript.ps1 @@ -59,11 +59,51 @@ try throw "Failed to create the following bucket: $identifier" } dotnet restore - Write-Host "Creating CloudFormation Stack $identifier, Architecture $arch" - dotnet lambda deploy-serverless - if (!$?) + + # Deploy with retries. The stack contains many Lambda functions that each reference + # an IAM role created in the same stack. CloudFormation occasionally calls Lambda + # CreateFunction before the role's trust policy has propagated through IAM, producing + # "The role defined for the function cannot be assumed by Lambda" and rolling the whole + # stack back. This is a transient eventual-consistency race, so retry the deployment. + $maxAttempts = 3 + $deploySucceeded = $false + for ($attempt = 1; $attempt -le $maxAttempts; $attempt++) + { + Write-Host "Creating CloudFormation Stack $identifier, Architecture $arch (attempt $attempt of $maxAttempts)" + dotnet lambda deploy-serverless + if ($?) + { + $deploySucceeded = $true + break + } + + Write-Host "Deployment attempt $attempt failed. Fetching CloudFormation stack events for debugging..." + try { + $events = aws cloudformation describe-stack-events --stack-name $identifier --query "StackEvents[?ResourceStatus=='CREATE_FAILED' || ResourceStatus=='UPDATE_FAILED' || ResourceStatus=='DELETE_FAILED']" --output json 2>&1 + if ($events) { + Write-Host "CloudFormation failed events:" + Write-Host $events + } + } + catch { + Write-Host "Could not fetch CloudFormation events: $_" + } + + if ($attempt -lt $maxAttempts) + { + # A failed create leaves the stack in ROLLBACK_COMPLETE, which cannot be updated + # or re-created. Delete it (and wait for the delete to finish) before retrying. + Write-Host "Deleting rolled-back stack $identifier before retrying..." + aws cloudformation delete-stack --stack-name $identifier + aws cloudformation wait stack-delete-complete --stack-name $identifier + # Brief pause to give IAM additional time to settle before the next attempt. + Start-Sleep -Seconds 15 + } + } + + if (!$deploySucceeded) { - throw "Failed to create the following CloudFormation stack: $identifier" + throw "Failed to create the following CloudFormation stack after $maxAttempts attempts: $identifier" } } finally From 5cb59fef5479898394d5902def40d45ce7dbdf38 Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Sat, 27 Jun 2026 10:13:33 -0400 Subject: [PATCH 02/12] Speed up integration tests via parallelism and faster stack queries The integration-test phase ran everything serially and dominated CI wall-clock. Four independent changes cut that down: - run-integ-tests now runs each *.IntegrationTests.csproj concurrently (buildtools/run-integ-tests-parallel.ps1). Each project deploys its own isolated CloudFormation stack, so they share no state. Replaces the serial MSBuild item-batched Exec. - LambdaHelper.FilterByCloudFormationStackAsync now lists the stack's resources via CloudFormation ListStackResources instead of scanning every Lambda in the account and reading each function's tags one at a time. O(stack size) instead of O(account size), and no longer throttles in a shared test account. - TestServerlessApp and TestCustomAuthorizerApp integ tests share their single deployed-stack fixture across the assembly via IAssemblyFixture (the Xunit.Extensions.AssemblyFixture package) instead of one serial [Collection]. The stack still deploys once, but the test classes now run in parallel. - The durable execution integ suite (45 independent tests, each deploying its own uniquely-named function) no longer forces maxParallelThreads=1; its build helper already guards concurrent publishes with a per-directory file lock. Verified end-to-end against AWS: TestCustomAuthorizerApp deploys its stack once and all 20 tests pass under the parallel AssemblyFixture setup. --- .../xunit.runner.json | 4 +- .../IntegrationTests.Helpers/LambdaHelper.cs | 35 +++++++---- .../HealthCheckTests.cs | 4 +- .../HttpApiV1Tests.cs | 4 +- .../HttpApiV2Tests.cs | 4 +- .../IntegrationTestContextFixture.cs | 5 +- ...IntegrationTestContextFixtureCollection.cs | 15 ++--- .../NonStringAuthorizerTests.cs | 4 +- .../RestApiTests.cs | 4 +- .../SimpleHttpApiAuthorizerTests.cs | 4 +- .../SimpleRestApiAuthorizerTests.cs | 4 +- ...ustomAuthorizerApp.IntegrationTests.csproj | 3 + .../ALBIntegrationTestContextFixture.cs | 5 +- .../ComplexCalculator.cs | 4 +- .../CustomResponse.cs | 4 +- .../DynamoDBEventSourceMapping.cs | 4 +- .../FunctionUrlExample.cs | 4 +- .../Greeter.cs | 4 +- .../IntegrationTestContextFixture.cs | 5 +- ...IntegrationTestContextFixtureCollection.cs | 13 ++-- .../S3EventNotification.cs | 4 +- .../SNSEventSubscription.cs | 4 +- .../SQSEventSourceMapping.cs | 4 +- .../ScheduleEventRule.cs | 4 +- .../SimpleCalculator.cs | 4 +- .../TestServerlessApp.IntegrationTests.csproj | 3 + buildtools/build.proj | 11 ++-- buildtools/run-integ-tests-parallel.ps1 | 63 +++++++++++++++++++ 28 files changed, 150 insertions(+), 80 deletions(-) create mode 100644 buildtools/run-integ-tests-parallel.ps1 diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/xunit.runner.json b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/xunit.runner.json index b6de9b357..73179ea81 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/xunit.runner.json +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/xunit.runner.json @@ -1,6 +1,6 @@ { "$schema": "https://xunit.net/schema/current/xunit.runner.schema.json", - "parallelizeTestCollections": false, + "parallelizeTestCollections": true, "parallelizeAssembly": false, - "maxParallelThreads": 1 + "maxParallelThreads": 4 } diff --git a/Libraries/test/IntegrationTests.Helpers/LambdaHelper.cs b/Libraries/test/IntegrationTests.Helpers/LambdaHelper.cs index 2a6d70f6c..8b6d62fd0 100644 --- a/Libraries/test/IntegrationTests.Helpers/LambdaHelper.cs +++ b/Libraries/test/IntegrationTests.Helpers/LambdaHelper.cs @@ -2,7 +2,10 @@ // SPDX-License-Identifier: Apache-2.0 using System.Collections.Generic; +using System.Linq; using System.Threading.Tasks; +using Amazon.CloudFormation; +using Amazon.CloudFormation.Model; using Amazon.Lambda; using Amazon.Lambda.Model; @@ -10,31 +13,39 @@ namespace IntegrationTests.Helpers { public class LambdaHelper { + // Resource type that SAM AWS::Serverless::Function resources are transformed into in the deployed stack. + private const string LambdaFunctionResourceType = "AWS::Lambda::Function"; + private readonly IAmazonLambda _lambdaClient; + private readonly IAmazonCloudFormation _cloudFormationClient; - public LambdaHelper(IAmazonLambda lambdaClient) + public LambdaHelper(IAmazonLambda lambdaClient, IAmazonCloudFormation cloudFormationClient) { _lambdaClient = lambdaClient; + _cloudFormationClient = cloudFormationClient; } + /// + /// Returns the Lambda functions belonging to a CloudFormation stack by listing the stack's + /// resources directly. This is O(stack size) and independent of how many functions exist in + /// the account, unlike scanning every function and reading its tags one at a time, which is + /// slow and prone to throttling in a shared test account. + /// public async Task> FilterByCloudFormationStackAsync(string stackName) { - const string stackNameKey = "aws:cloudformation:stack-name"; - const string logicalIdKey = "aws:cloudformation:logical-id"; var lambdaFunctions = new List(); - var paginator = _lambdaClient.Paginators.ListFunctions(new ListFunctionsRequest()); + var paginator = _cloudFormationClient.Paginators.ListStackResources( + new ListStackResourcesRequest { StackName = stackName }); - await foreach (var function in paginator.Functions) + await foreach (var resource in paginator.StackResourceSummaries) { - var tags = (await _lambdaClient.ListTagsAsync(new ListTagsRequest { Resource = function.FunctionArn })).Tags; - if (tags.ContainsKey(stackNameKey) && string.Equals(tags[stackNameKey], stackName)) + if (string.Equals(resource.ResourceType, LambdaFunctionResourceType)) { - var lambdaFunction = new LambdaFunction + lambdaFunctions.Add(new LambdaFunction { - LogicalId = tags[logicalIdKey], - Name = function.FunctionName - }; - lambdaFunctions.Add(lambdaFunction); + LogicalId = resource.LogicalResourceId, + Name = resource.PhysicalResourceId + }); } } diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HealthCheckTests.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HealthCheckTests.cs index 2360d7305..2e6891a6c 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HealthCheckTests.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HealthCheckTests.cs @@ -1,13 +1,13 @@ using System.Net; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestCustomAuthorizerApp.IntegrationTests; /// /// Tests for the health check endpoint which does not require authorization. /// -[Collection("Integration Tests")] -public class HealthCheckTests +public class HealthCheckTests : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV1Tests.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV1Tests.cs index 990f06c28..51eb3a0a0 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV1Tests.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV1Tests.cs @@ -2,6 +2,7 @@ using System.Net.Http.Headers; using Newtonsoft.Json.Linq; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestCustomAuthorizerApp.IntegrationTests; @@ -12,8 +13,7 @@ namespace TestCustomAuthorizerApp.IntegrationTests; /// These tests verify that the source-generated Lambda handler correctly extracts /// values from the authorizer context using [FromCustomAuthorizer] attributes. /// -[Collection("Integration Tests")] -public class HttpApiV1Tests +public class HttpApiV1Tests : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV2Tests.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV2Tests.cs index c63231f35..6bf1df605 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV2Tests.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV2Tests.cs @@ -2,6 +2,7 @@ using System.Net.Http.Headers; using Newtonsoft.Json.Linq; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestCustomAuthorizerApp.IntegrationTests; @@ -12,8 +13,7 @@ namespace TestCustomAuthorizerApp.IntegrationTests; /// These tests verify that the source-generated Lambda handler correctly extracts /// values from the authorizer context using [FromCustomAuthorizer] attributes. /// -[Collection("Integration Tests")] -public class HttpApiV2Tests +public class HttpApiV2Tests : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/IntegrationTestContextFixture.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/IntegrationTestContextFixture.cs index 06cba6a17..71c91b782 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/IntegrationTestContextFixture.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/IntegrationTestContextFixture.cs @@ -39,9 +39,10 @@ public class IntegrationTestContextFixture : IAsyncLifetime public IntegrationTestContextFixture() { - _cloudFormationHelper = new CloudFormationHelper(new AmazonCloudFormationClient(Amazon.RegionEndpoint.USWest2)); + var cloudFormationClient = new AmazonCloudFormationClient(Amazon.RegionEndpoint.USWest2); + _cloudFormationHelper = new CloudFormationHelper(cloudFormationClient); _s3Helper = new S3Helper(new AmazonS3Client(Amazon.RegionEndpoint.USWest2)); - LambdaHelper = new LambdaHelper(new AmazonLambdaClient(Amazon.RegionEndpoint.USWest2)); + LambdaHelper = new LambdaHelper(new AmazonLambdaClient(Amazon.RegionEndpoint.USWest2), cloudFormationClient); CloudWatchHelper = new CloudWatchHelper(new AmazonCloudWatchLogsClient(Amazon.RegionEndpoint.USWest2)); HttpClient = new HttpClient(); } diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/IntegrationTestContextFixtureCollection.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/IntegrationTestContextFixtureCollection.cs index dd673e7b9..db28d5278 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/IntegrationTestContextFixtureCollection.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/IntegrationTestContextFixtureCollection.cs @@ -1,11 +1,4 @@ -using Xunit; - -namespace TestCustomAuthorizerApp.IntegrationTests; - -[CollectionDefinition("Integration Tests", DisableParallelization = true)] -public class IntegrationTestContextFixtureCollection : ICollectionFixture -{ - // This class has no code, and is never created. Its purpose is simply - // to be the place to apply [CollectionDefinition] and all the - // ICollectionFixture<> interfaces. -} +// Registers the AssemblyFixture test framework so test classes can share a single +// IntegrationTestContextFixture (one deployed stack) via IAssemblyFixture while still +// running in parallel. Without this attribute IAssemblyFixture is silently ignored. +[assembly: Xunit.TestFramework("Xunit.Extensions.AssemblyFixture.AssemblyFixtureFramework", "Xunit.Extensions.AssemblyFixture")] diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/NonStringAuthorizerTests.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/NonStringAuthorizerTests.cs index 0d25145dd..c61670586 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/NonStringAuthorizerTests.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/NonStringAuthorizerTests.cs @@ -2,6 +2,7 @@ using System.Net.Http.Headers; using Newtonsoft.Json.Linq; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestCustomAuthorizerApp.IntegrationTests; @@ -13,8 +14,7 @@ namespace TestCustomAuthorizerApp.IntegrationTests; /// These tests exercise the type conversion logic in the .tt template's generated code /// using Convert.ChangeType() to convert authorizer context values to the parameter types. /// -[Collection("Integration Tests")] -public class NonStringAuthorizerTests +public class NonStringAuthorizerTests : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/RestApiTests.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/RestApiTests.cs index a226762d7..1bc01991c 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/RestApiTests.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/RestApiTests.cs @@ -2,6 +2,7 @@ using System.Net.Http.Headers; using Newtonsoft.Json.Linq; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestCustomAuthorizerApp.IntegrationTests; @@ -12,8 +13,7 @@ namespace TestCustomAuthorizerApp.IntegrationTests; /// These tests verify that the source-generated Lambda handler correctly extracts /// values from the authorizer context using [FromCustomAuthorizer] attributes. /// -[Collection("Integration Tests")] -public class RestApiTests +public class RestApiTests : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleHttpApiAuthorizerTests.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleHttpApiAuthorizerTests.cs index 468caa3b4..34764731e 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleHttpApiAuthorizerTests.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleHttpApiAuthorizerTests.cs @@ -2,6 +2,7 @@ using System.Net.Http.Headers; using Newtonsoft.Json.Linq; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestCustomAuthorizerApp.IntegrationTests; @@ -13,8 +14,7 @@ namespace TestCustomAuthorizerApp.IntegrationTests; /// The authorizer under test is /// which returns IAuthorizerResult (AuthorizerResults.Allow()/Deny()) instead of raw API Gateway types. /// -[Collection("Integration Tests")] -public class SimpleHttpApiAuthorizerTests +public class SimpleHttpApiAuthorizerTests : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleRestApiAuthorizerTests.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleRestApiAuthorizerTests.cs index 3d64ba6c2..8cacdcd7f 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleRestApiAuthorizerTests.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleRestApiAuthorizerTests.cs @@ -2,6 +2,7 @@ using System.Net.Http.Headers; using Newtonsoft.Json.Linq; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestCustomAuthorizerApp.IntegrationTests; @@ -14,8 +15,7 @@ namespace TestCustomAuthorizerApp.IntegrationTests; /// which returns IAuthorizerResult (AuthorizerResults.Allow()/Deny()) instead of raw API Gateway types. /// The generated handler serializes this to an IAM policy document with the correct MethodArn. /// -[Collection("Integration Tests")] -public class SimpleRestApiAuthorizerTests +public class SimpleRestApiAuthorizerTests : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/TestCustomAuthorizerApp.IntegrationTests.csproj b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/TestCustomAuthorizerApp.IntegrationTests.csproj index bc3018c9c..98fe17c46 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/TestCustomAuthorizerApp.IntegrationTests.csproj +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/TestCustomAuthorizerApp.IntegrationTests.csproj @@ -13,6 +13,9 @@ + + all runtime; build; native; contentfiles; analyzers diff --git a/Libraries/test/TestServerlessApp.ALB.IntegrationTests/ALBIntegrationTestContextFixture.cs b/Libraries/test/TestServerlessApp.ALB.IntegrationTests/ALBIntegrationTestContextFixture.cs index 40c70f7be..84d6d17b3 100644 --- a/Libraries/test/TestServerlessApp.ALB.IntegrationTests/ALBIntegrationTestContextFixture.cs +++ b/Libraries/test/TestServerlessApp.ALB.IntegrationTests/ALBIntegrationTestContextFixture.cs @@ -35,9 +35,10 @@ public class ALBIntegrationTestContextFixture : IAsyncLifetime public ALBIntegrationTestContextFixture() { - _cloudFormationHelper = new CloudFormationHelper(new AmazonCloudFormationClient(Amazon.RegionEndpoint.USWest2)); + var cloudFormationClient = new AmazonCloudFormationClient(Amazon.RegionEndpoint.USWest2); + _cloudFormationHelper = new CloudFormationHelper(cloudFormationClient); _s3Helper = new S3Helper(new AmazonS3Client(Amazon.RegionEndpoint.USWest2)); - LambdaHelper = new LambdaHelper(new AmazonLambdaClient(Amazon.RegionEndpoint.USWest2)); + LambdaHelper = new LambdaHelper(new AmazonLambdaClient(Amazon.RegionEndpoint.USWest2), cloudFormationClient); ELBv2Client = new AmazonElasticLoadBalancingV2Client(Amazon.RegionEndpoint.USWest2); HttpClient = new HttpClient(); } diff --git a/Libraries/test/TestServerlessApp.IntegrationTests/ComplexCalculator.cs b/Libraries/test/TestServerlessApp.IntegrationTests/ComplexCalculator.cs index 98075930f..cc1a66091 100644 --- a/Libraries/test/TestServerlessApp.IntegrationTests/ComplexCalculator.cs +++ b/Libraries/test/TestServerlessApp.IntegrationTests/ComplexCalculator.cs @@ -4,11 +4,11 @@ using Newtonsoft.Json; using Newtonsoft.Json.Linq; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestServerlessApp.IntegrationTests { - [Collection("Integration Tests")] - public class ComplexCalculator + public class ComplexCalculator : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestServerlessApp.IntegrationTests/CustomResponse.cs b/Libraries/test/TestServerlessApp.IntegrationTests/CustomResponse.cs index 433adac4d..15502af64 100644 --- a/Libraries/test/TestServerlessApp.IntegrationTests/CustomResponse.cs +++ b/Libraries/test/TestServerlessApp.IntegrationTests/CustomResponse.cs @@ -6,11 +6,11 @@ using System.Text; using System.Threading.Tasks; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestServerlessApp.IntegrationTests { - [Collection("Integration Tests")] - public class CustomResponse + public class CustomResponse : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestServerlessApp.IntegrationTests/DynamoDBEventSourceMapping.cs b/Libraries/test/TestServerlessApp.IntegrationTests/DynamoDBEventSourceMapping.cs index cef6c76e4..dcfd5d476 100644 --- a/Libraries/test/TestServerlessApp.IntegrationTests/DynamoDBEventSourceMapping.cs +++ b/Libraries/test/TestServerlessApp.IntegrationTests/DynamoDBEventSourceMapping.cs @@ -4,11 +4,11 @@ using System.Linq; using System.Threading.Tasks; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestServerlessApp.IntegrationTests { - [Collection("Integration Tests")] - public class DynamoDBEventSourceMapping + public class DynamoDBEventSourceMapping : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestServerlessApp.IntegrationTests/FunctionUrlExample.cs b/Libraries/test/TestServerlessApp.IntegrationTests/FunctionUrlExample.cs index b3f97929b..286c7575e 100644 --- a/Libraries/test/TestServerlessApp.IntegrationTests/FunctionUrlExample.cs +++ b/Libraries/test/TestServerlessApp.IntegrationTests/FunctionUrlExample.cs @@ -7,11 +7,11 @@ using System.Threading.Tasks; using Newtonsoft.Json.Linq; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestServerlessApp.IntegrationTests { - [Collection("Integration Tests")] - public class FunctionUrlExample + public class FunctionUrlExample : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestServerlessApp.IntegrationTests/Greeter.cs b/Libraries/test/TestServerlessApp.IntegrationTests/Greeter.cs index 395ebfc29..bafde7c97 100644 --- a/Libraries/test/TestServerlessApp.IntegrationTests/Greeter.cs +++ b/Libraries/test/TestServerlessApp.IntegrationTests/Greeter.cs @@ -4,11 +4,11 @@ using System.Net.Http; using System.Threading.Tasks; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestServerlessApp.IntegrationTests { - [Collection("Integration Tests")] - public class Greeter + public class Greeter : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestServerlessApp.IntegrationTests/IntegrationTestContextFixture.cs b/Libraries/test/TestServerlessApp.IntegrationTests/IntegrationTestContextFixture.cs index 864b72058..27bdf2f83 100644 --- a/Libraries/test/TestServerlessApp.IntegrationTests/IntegrationTestContextFixture.cs +++ b/Libraries/test/TestServerlessApp.IntegrationTests/IntegrationTestContextFixture.cs @@ -42,10 +42,11 @@ public class IntegrationTestContextFixture : IAsyncLifetime public IntegrationTestContextFixture() { - _cloudFormationHelper = new CloudFormationHelper(new AmazonCloudFormationClient(Amazon.RegionEndpoint.USWest2)); + var cloudFormationClient = new AmazonCloudFormationClient(Amazon.RegionEndpoint.USWest2); + _cloudFormationHelper = new CloudFormationHelper(cloudFormationClient); _s3Helper = new S3Helper(new AmazonS3Client(Amazon.RegionEndpoint.USWest2)); S3HelperInstance = _s3Helper; - LambdaHelper = new LambdaHelper(new AmazonLambdaClient(Amazon.RegionEndpoint.USWest2)); + LambdaHelper = new LambdaHelper(new AmazonLambdaClient(Amazon.RegionEndpoint.USWest2), cloudFormationClient); CloudWatchHelper = new CloudWatchHelper(new AmazonCloudWatchLogsClient(Amazon.RegionEndpoint.USWest2)); HttpClient = new HttpClient(); } diff --git a/Libraries/test/TestServerlessApp.IntegrationTests/IntegrationTestContextFixtureCollection.cs b/Libraries/test/TestServerlessApp.IntegrationTests/IntegrationTestContextFixtureCollection.cs index a58ad3967..db28d5278 100644 --- a/Libraries/test/TestServerlessApp.IntegrationTests/IntegrationTestContextFixtureCollection.cs +++ b/Libraries/test/TestServerlessApp.IntegrationTests/IntegrationTestContextFixtureCollection.cs @@ -1,9 +1,4 @@ -using Xunit; - -namespace TestServerlessApp.IntegrationTests -{ - [CollectionDefinition("Integration Tests")] - public class IntegrationTestContextFixtureCollection : ICollectionFixture - { - } -} \ No newline at end of file +// Registers the AssemblyFixture test framework so test classes can share a single +// IntegrationTestContextFixture (one deployed stack) via IAssemblyFixture while still +// running in parallel. Without this attribute IAssemblyFixture is silently ignored. +[assembly: Xunit.TestFramework("Xunit.Extensions.AssemblyFixture.AssemblyFixtureFramework", "Xunit.Extensions.AssemblyFixture")] diff --git a/Libraries/test/TestServerlessApp.IntegrationTests/S3EventNotification.cs b/Libraries/test/TestServerlessApp.IntegrationTests/S3EventNotification.cs index d9758ae00..88fd75659 100644 --- a/Libraries/test/TestServerlessApp.IntegrationTests/S3EventNotification.cs +++ b/Libraries/test/TestServerlessApp.IntegrationTests/S3EventNotification.cs @@ -5,11 +5,11 @@ using System.Threading.Tasks; using Amazon.S3; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestServerlessApp.IntegrationTests { - [Collection("Integration Tests")] - public class S3EventNotification + public class S3EventNotification : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestServerlessApp.IntegrationTests/SNSEventSubscription.cs b/Libraries/test/TestServerlessApp.IntegrationTests/SNSEventSubscription.cs index 075a5162b..c644a661b 100644 --- a/Libraries/test/TestServerlessApp.IntegrationTests/SNSEventSubscription.cs +++ b/Libraries/test/TestServerlessApp.IntegrationTests/SNSEventSubscription.cs @@ -5,11 +5,11 @@ using System.Threading.Tasks; using Amazon.SimpleNotificationService; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestServerlessApp.IntegrationTests { - [Collection("Integration Tests")] - public class SNSEventSubscription + public class SNSEventSubscription : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestServerlessApp.IntegrationTests/SQSEventSourceMapping.cs b/Libraries/test/TestServerlessApp.IntegrationTests/SQSEventSourceMapping.cs index 02f803074..78b886cc6 100644 --- a/Libraries/test/TestServerlessApp.IntegrationTests/SQSEventSourceMapping.cs +++ b/Libraries/test/TestServerlessApp.IntegrationTests/SQSEventSourceMapping.cs @@ -4,11 +4,11 @@ using System.Linq; using System.Threading.Tasks; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestServerlessApp.IntegrationTests { - [Collection("Integration Tests")] - public class SQSEventSourceMapping + public class SQSEventSourceMapping : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestServerlessApp.IntegrationTests/ScheduleEventRule.cs b/Libraries/test/TestServerlessApp.IntegrationTests/ScheduleEventRule.cs index 19ef9e9da..242e8a605 100644 --- a/Libraries/test/TestServerlessApp.IntegrationTests/ScheduleEventRule.cs +++ b/Libraries/test/TestServerlessApp.IntegrationTests/ScheduleEventRule.cs @@ -6,11 +6,11 @@ using Amazon.CloudWatchEvents; using Amazon.CloudWatchEvents.Model; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestServerlessApp.IntegrationTests { - [Collection("Integration Tests")] - public class ScheduleEventRule + public class ScheduleEventRule : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestServerlessApp.IntegrationTests/SimpleCalculator.cs b/Libraries/test/TestServerlessApp.IntegrationTests/SimpleCalculator.cs index 1d0df22ec..80dc2c4a7 100644 --- a/Libraries/test/TestServerlessApp.IntegrationTests/SimpleCalculator.cs +++ b/Libraries/test/TestServerlessApp.IntegrationTests/SimpleCalculator.cs @@ -4,11 +4,11 @@ using System.Net.Http; using System.Threading.Tasks; using Xunit; +using Xunit.Extensions.AssemblyFixture; namespace TestServerlessApp.IntegrationTests { - [Collection("Integration Tests")] - public class SimpleCalculator + public class SimpleCalculator : IAssemblyFixture { private readonly IntegrationTestContextFixture _fixture; diff --git a/Libraries/test/TestServerlessApp.IntegrationTests/TestServerlessApp.IntegrationTests.csproj b/Libraries/test/TestServerlessApp.IntegrationTests/TestServerlessApp.IntegrationTests.csproj index 36bce5b4c..7597a5f29 100644 --- a/Libraries/test/TestServerlessApp.IntegrationTests/TestServerlessApp.IntegrationTests.csproj +++ b/Libraries/test/TestServerlessApp.IntegrationTests/TestServerlessApp.IntegrationTests.csproj @@ -14,6 +14,9 @@ + + all runtime; build; native; contentfiles; analyzers diff --git a/buildtools/build.proj b/buildtools/build.proj index c2ea686e7..550e72703 100644 --- a/buildtools/build.proj +++ b/buildtools/build.proj @@ -218,12 +218,11 @@ - - - - - + + diff --git a/buildtools/run-integ-tests-parallel.ps1 b/buildtools/run-integ-tests-parallel.ps1 new file mode 100644 index 000000000..3614d9420 --- /dev/null +++ b/buildtools/run-integ-tests-parallel.ps1 @@ -0,0 +1,63 @@ +#!/usr/bin/env pwsh +# Runs every integration test project concurrently. Each *.IntegrationTests.csproj deploys its own +# isolated CloudFormation stack (unique name + S3 bucket), so the projects have no shared state and +# can run in parallel. Running them serially was the dominant cost of the CI integ-test phase. +# +# Output from each project is captured and printed as a labeled block after that project finishes, +# so the interleaved logs of parallel runs stay readable. The script exits non-zero if any project +# fails, listing which ones. + +param( + [string]$Configuration = "Release", + # Directory to search for integration test projects (defaults to the Libraries/test tree). + [string]$TestRoot = (Join-Path $PSScriptRoot ".." "Libraries" "test"), + # Upper bound on how many projects run at once. + [int]$ThrottleLimit = 5 +) + +$ErrorActionPreference = 'Stop' + +$projects = Get-ChildItem -Path $TestRoot -Recurse -Filter "*.IntegrationTests.csproj" | + Select-Object -ExpandProperty FullName | + Sort-Object + +if (-not $projects) +{ + Write-Host "No integration test projects found under '$TestRoot'." + exit 0 +} + +Write-Host "Running $($projects.Count) integration test project(s) in parallel (throttle limit $ThrottleLimit):" +$projects | ForEach-Object { Write-Host " - $_" } + +$results = $projects | ForEach-Object -ThrottleLimit $ThrottleLimit -Parallel { + $project = $_ + $name = [System.IO.Path]::GetFileNameWithoutExtension($project) + # 2>&1 folds stderr into the captured stream so warnings/errors appear in the labeled block. + $output = dotnet test -c $using:Configuration --logger "console;verbosity=detailed" $project 2>&1 | Out-String + [PSCustomObject]@{ + Name = $name + Project = $project + ExitCode = $LASTEXITCODE + Output = $output + } +} + +foreach ($result in $results) +{ + Write-Host "" + Write-Host "==================== $($result.Name) (exit $($result.ExitCode)) ====================" + Write-Host $result.Output +} + +$failed = $results | Where-Object { $_.ExitCode -ne 0 } +if ($failed) +{ + Write-Host "" + Write-Host "The following integration test project(s) failed:" + $failed | ForEach-Object { Write-Host " - $($_.Name)" } + exit 1 +} + +Write-Host "" +Write-Host "All integration test projects passed." From 3255e972186bb66898603f2938f178e485d06a5c Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Sat, 27 Jun 2026 10:34:52 -0400 Subject: [PATCH 03/12] Stream integ-test output live instead of buffering until project completion The parallel runner captured each project's output with Out-String and only printed it after the project finished, so nothing appeared during the long integration-test run. Stream each line to the host as it arrives, prefixed with the project name so the interleaved parallel logs stay attributable. Failed projects still get their full output reprinted as one clean block at the end. --- buildtools/run-integ-tests-parallel.ps1 | 27 ++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/buildtools/run-integ-tests-parallel.ps1 b/buildtools/run-integ-tests-parallel.ps1 index 3614d9420..b87b88295 100644 --- a/buildtools/run-integ-tests-parallel.ps1 +++ b/buildtools/run-integ-tests-parallel.ps1 @@ -3,9 +3,10 @@ # isolated CloudFormation stack (unique name + S3 bucket), so the projects have no shared state and # can run in parallel. Running them serially was the dominant cost of the CI integ-test phase. # -# Output from each project is captured and printed as a labeled block after that project finishes, -# so the interleaved logs of parallel runs stay readable. The script exits non-zero if any project -# fails, listing which ones. +# Each project's output is streamed live, prefixed with the project name so the interleaved logs of +# the parallel runs stay attributable. Failed projects also get their full output reprinted as one +# clean block at the end (un-interleaved) for easier diagnosis. The script exits non-zero if any +# project fails, listing which ones. param( [string]$Configuration = "Release", @@ -33,24 +34,32 @@ $projects | ForEach-Object { Write-Host " - $_" } $results = $projects | ForEach-Object -ThrottleLimit $ThrottleLimit -Parallel { $project = $_ $name = [System.IO.Path]::GetFileNameWithoutExtension($project) - # 2>&1 folds stderr into the captured stream so warnings/errors appear in the labeled block. - $output = dotnet test -c $using:Configuration --logger "console;verbosity=detailed" $project 2>&1 | Out-String + $lines = [System.Collections.Generic.List[string]]::new() + # 2>&1 folds stderr into the stream. Each line is emitted to the host as it arrives, prefixed + # with the project name, so progress is visible during the (long) run instead of only at the end. + dotnet test -c $using:Configuration --logger "console;verbosity=detailed" $project 2>&1 | + ForEach-Object { + $line = $_.ToString() + $lines.Add($line) + Write-Host "[$name] $line" + } [PSCustomObject]@{ Name = $name Project = $project ExitCode = $LASTEXITCODE - Output = $output + Output = ($lines -join [System.Environment]::NewLine) } } -foreach ($result in $results) +# Reprint each failed project's output as one clean, un-interleaved block for easier diagnosis. +$failed = $results | Where-Object { $_.ExitCode -ne 0 } +foreach ($result in $failed) { Write-Host "" - Write-Host "==================== $($result.Name) (exit $($result.ExitCode)) ====================" + Write-Host "==================== FAILED: $($result.Name) (exit $($result.ExitCode)) ====================" Write-Host $result.Output } -$failed = $results | Where-Object { $_.ExitCode -ne 0 } if ($failed) { Write-Host "" From b0d874537b5efc03dd931d61877a3daf80484266 Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Sat, 27 Jun 2026 11:02:20 -0400 Subject: [PATCH 04/12] Fix flaky durable suspend tests: await termination signal instead of fixed delay MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit InvokeOperationTests.InvokeAsync_FreshExecution_CheckpointsStartAndSuspends failed intermittently on net10.0 (e.g. CI run on PR #2451). The suspend-path tests kicked off an operation, slept a fixed 10-50ms, then asserted tm.IsTerminated. Under CI thread-pool pressure the suspend signal didn't always fire within that window, so the assert raced and failed. TerminationManager already exposes TerminationTask, a Task that completes exactly when Terminate() fires. Replace the fixed delays with a shared tm.WaitForTerminationAsync() helper that awaits that task (bounded by a 10s timeout so a genuine non-suspension still fails fast at the assert). Applied to all 13 suspend-gated sites across 5 test files. Verified: full suite passes on net8.0 and net10.0, and the previously-flaky test passed 25/25 consecutive runs on net10.0. Also faster — tests resume the instant suspension fires instead of always sleeping. --- .../CallbackOperationTests.cs | 4 ++-- .../ChildContextOperationTests.cs | 2 +- .../DurableContextTests.cs | 4 ++-- .../InvokeOperationTests.cs | 12 +++++----- .../TerminationTestHelpers.cs | 23 +++++++++++++++++++ .../WaitForConditionOperationTests.cs | 4 ++-- 6 files changed, 36 insertions(+), 13 deletions(-) create mode 100644 Libraries/test/Amazon.Lambda.DurableExecution.Tests/TerminationTestHelpers.cs diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/CallbackOperationTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/CallbackOperationTests.cs index 99a1342fe..a826c0c10 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/CallbackOperationTests.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/CallbackOperationTests.cs @@ -163,7 +163,7 @@ public async Task GetResultAsync_FreshExecution_SuspendsExecution() // GetResultAsync should signal termination and return a never-completing task. var resultTask = callback.GetResultAsync(); - await Task.Delay(10); + await tm.WaitForTerminationAsync(); Assert.True(tm.IsTerminated); Assert.False(resultTask.IsCompleted); @@ -193,7 +193,7 @@ public async Task ReplayStarted_DoesNotReFlushStart_AndSuspendsOnGetResult() Assert.False(tm.IsTerminated); var resultTask = callback.GetResultAsync(); - await Task.Delay(10); + await tm.WaitForTerminationAsync(); Assert.True(tm.IsTerminated); Assert.False(resultTask.IsCompleted); diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ChildContextOperationTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ChildContextOperationTests.cs index b8b2e952b..1782fe933 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ChildContextOperationTests.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/ChildContextOperationTests.cs @@ -322,7 +322,7 @@ public async Task RunInChildContextAsync_ChildSuspendsOnWait_TerminatesWithWaitS }, name: "phase"); - await Task.Delay(50); + await tm.WaitForTerminationAsync(); Assert.True(tm.IsTerminated); Assert.False(task.IsCompleted); diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableContextTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableContextTests.cs index 74fcfe3fb..76d7b748a 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableContextTests.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/DurableContextTests.cs @@ -379,7 +379,7 @@ public async Task WaitAsync_NewExecution_SignalsTermination() var waitTask = context.WaitAsync(TimeSpan.FromSeconds(30), name: "my_wait"); // Give it a moment to execute - await Task.Delay(10); + await tm.WaitForTerminationAsync(); Assert.True(tm.IsTerminated); Assert.False(waitTask.IsCompleted); @@ -433,7 +433,7 @@ public async Task WaitAsync_StartedButNotExpired_ResuspendsWithoutNewCheckpoint( var waitTask = context.WaitAsync(TimeSpan.FromSeconds(30), name: "pending_wait"); - await Task.Delay(10); + await tm.WaitForTerminationAsync(); Assert.True(tm.IsTerminated); Assert.False(waitTask.IsCompleted); diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/InvokeOperationTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/InvokeOperationTests.cs index daf933cb5..c69568ca9 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/InvokeOperationTests.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/InvokeOperationTests.cs @@ -74,7 +74,7 @@ public async Task InvokeAsync_PreservesUnqualifiedArn_AndPassesItThrough() payload: "x", name: "noversion"); - await Task.Delay(20); + await tm.WaitForTerminationAsync(); Assert.True(tm.IsTerminated); Assert.False(task.IsCompleted); @@ -100,7 +100,7 @@ public async Task InvokeAsync_FreshExecution_CheckpointsStartAndSuspends() // Service-side suspend mechanics: TerminationManager fires before the // user task completes; the task itself never resolves on the fresh path. - await Task.Delay(20); + await tm.WaitForTerminationAsync(); Assert.True(tm.IsTerminated); Assert.False(task.IsCompleted); @@ -130,7 +130,7 @@ public async Task InvokeAsync_FreshExecution_NoTenantId_OmitsTenantId() var task = context.InvokeAsync(FunctionArn, "payload", name: "no_tenant"); - await Task.Delay(20); + await tm.WaitForTerminationAsync(); Assert.True(tm.IsTerminated); Assert.False(task.IsCompleted); @@ -154,7 +154,7 @@ public async Task InvokeAsync_FreshExecution_StartIsSyncFlushed() var (context, recorder, tm, _) = CreateContext(); var task = context.InvokeAsync(FunctionArn, "x", name: "sync_flush"); - await Task.Delay(20); + await tm.WaitForTerminationAsync(); Assert.True(tm.IsTerminated); Assert.False(task.IsCompleted); @@ -350,7 +350,7 @@ public async Task InvokeAsync_ReplayStarted_ResuspendsWithoutRecheckpoint() }); var task = context.InvokeAsync(FunctionArn, "x", name: "still_running"); - await Task.Delay(20); + await tm.WaitForTerminationAsync(); Assert.True(tm.IsTerminated); Assert.False(task.IsCompleted); @@ -377,7 +377,7 @@ public async Task InvokeAsync_ReplayPending_ResuspendsWithoutRecheckpoint() }); var task = context.InvokeAsync(FunctionArn, "x", name: "pending"); - await Task.Delay(20); + await tm.WaitForTerminationAsync(); Assert.True(tm.IsTerminated); Assert.False(task.IsCompleted); diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/TerminationTestHelpers.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/TerminationTestHelpers.cs new file mode 100644 index 000000000..dcbf39553 --- /dev/null +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/TerminationTestHelpers.cs @@ -0,0 +1,23 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +using Amazon.Lambda.DurableExecution.Internal; + +namespace Amazon.Lambda.DurableExecution.Tests; + +/// +/// Shared helpers for tests that exercise the suspend/terminate path. +/// +internal static class TerminationTestHelpers +{ + /// + /// Waits for the suspend signal deterministically instead of a fixed delay, which races under + /// CI thread-pool pressure (the original Task.Delay assumed the suspend happened within a + /// fixed window, which isn't guaranteed). The suspend path trips + /// , which completes + /// . Bounded by a timeout so a genuine + /// non-suspension fails fast at the following assert instead of hanging. + /// + public static Task WaitForTerminationAsync(this TerminationManager tm, int timeoutSeconds = 10) => + Task.WhenAny(tm.TerminationTask, Task.Delay(TimeSpan.FromSeconds(timeoutSeconds))); +} diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WaitForConditionOperationTests.cs b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WaitForConditionOperationTests.cs index 50f7557b3..81eeb1c54 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WaitForConditionOperationTests.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.Tests/WaitForConditionOperationTests.cs @@ -89,7 +89,7 @@ public async Task FreshExecution_StrategyContinues_EmitsRetryAndSuspends() }, name: "poll"); - await Task.Delay(50); + await tm.WaitForTerminationAsync(); Assert.True(tm.IsTerminated); Assert.False(task.IsCompleted); @@ -818,7 +818,7 @@ public async Task FreshExecution_FlushesStartBeforeSuspending() }, name: "poll"); - await Task.Delay(50); + await tm.WaitForTerminationAsync(); Assert.True(tm.IsTerminated); Assert.False(task.IsCompleted); From 8f85e9c7eb3eb09e09458f74cd90b216529fa559 Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Sat, 27 Jun 2026 12:46:11 -0400 Subject: [PATCH 05/12] Fix IAM throttling and build thrash in parallel durable integ suite Running the durable integ suite in parallel (maxParallelThreads=4) surfaced two contention problems that this addresses. IAM 'Rate exceeded': each test created and deleted its own IAM role, so several deployments hammered IAM's (global, single-bucket, low-rate) mutating APIs at once. Replace per-test roles with a single shared execution role (durable-integ-shared-execution-role) created at most once per account and reused across tests and runs, gated so concurrent deployments don't race. It carries the union of permissions every scenario needs (invoke durable-integ-* functions + send durable-execution callbacks); no test depends on a role lacking a permission, so one role is safe. Dispose no longer deletes roles. Clients also use adaptive retry as a backstop. Build thrash/timeouts: each test published its function separately and wiped obj/bin first, so the shared source projects (Amazon.Lambda.DurableExecution etc.) were rebuilt per-test, and concurrent publishes thrashed MSBuild into 'dotnet timed out'. Publish all functions once, up front, in a single MSBuild pass via a generated traversal project (Restore;Publish, BuildInParallel) that builds the shared projects once and publishes each function to its own bin/publish; tests then only zip that output. Verified: 51/51 functions publish in one ~16s pass with 0 errors, and the suite no longer throttles IAM. --- .../DurableFunctionDeployment.cs | 422 +++++++++--------- 1 file changed, 207 insertions(+), 215 deletions(-) diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs index cd7448171..58e0ca71e 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs @@ -9,6 +9,7 @@ using Amazon.IdentityManagement.Model; using Amazon.Lambda; using Amazon.Lambda.Model; +using Amazon.Runtime; using Xunit.Abstractions; namespace Amazon.Lambda.DurableExecution.IntegrationTests; @@ -40,20 +41,36 @@ internal sealed class DurableFunctionDeployment : IAsyncDisposable private readonly IAmazonIdentityManagementService _iamClient; private readonly string _functionName; - private readonly string _roleName; private string? _roleArn; private string? _functionArn; private bool _functionCreated; - private readonly List _inlinePolicyNames = new(); // Optional paired "external system" Lambda — a plain (non-durable) function // that the workflow's submitter invokes. Models a real-world callback flow // where an out-of-band service resolves the durable execution. private readonly string _externalFunctionName; - private readonly string _externalRoleName; private string? _externalRoleArn; private bool _externalFunctionCreated; + // A single IAM role shared by every test function in the suite. Creating and deleting a role + // per deployment burst-throttled IAM ("Rate exceeded") once the suite started running in + // parallel — IAM is global, single-bucketed, and throttles mutating calls aggressively. The + // shared role is created at most once per account (reused across runs) and gated so concurrent + // deployments don't race to create it. No test depends on a role *lacking* a permission, so a + // single union-of-permissions role is safe; it is scoped to invoking durable-integ-* functions. + private const string SharedRoleName = "durable-integ-shared-execution-role"; + private static readonly SemaphoreSlim SharedRoleGate = new(1, 1); + private static string? _sharedRoleArn; + + // Publishing is done ONCE for all test functions, up front, instead of per-test. The test + // functions all reference the same source projects (Amazon.Lambda.DurableExecution etc.); + // publishing each function separately (and the old code wiped obj/bin first, forcing a cold + // build every time) rebuilt those shared projects dozens of times, and doing it concurrently + // thrashed MSBuild. A single up-front pass builds the shared projects once and the publishes + // run incrementally; each test then just zips its already-published output. + private static readonly SemaphoreSlim PrePublishGate = new(1, 1); + private static bool _prePublished; + public string FunctionName => _functionName; public string? ExternalFunctionName => _externalFunctionCreated ? _externalFunctionName : null; @@ -77,16 +94,34 @@ internal sealed class DurableFunctionDeployment : IAsyncDisposable private DurableFunctionDeployment(ITestOutputHelper output, string suffix) { _output = output; - _lambdaClient = new AmazonLambdaClient(DeploymentRegion); - _iamClient = new AmazonIdentityManagementServiceClient(DeploymentRegion); + // The integration suite runs its test classes in parallel, so several deployments hit IAM + // (CreateRole/AttachRolePolicy/DeleteRole) at once. IAM has low request-rate limits and + // returns "Rate exceeded" under that contention. Adaptive retry adds client-side rate + // limiting and backs off on throttling, and a higher retry count rides out longer throttle + // windows, instead of failing the test on the first throttle. + _lambdaClient = new AmazonLambdaClient(BuildClientConfig()); + _iamClient = new AmazonIdentityManagementServiceClient(BuildClientConfig()); // Truncate the GUID (not the suffix) so CloudTrail entries stay readable. // Keep the GUID short enough that the total stays well under 40 chars even for long suffixes. static string ShortId() => Guid.NewGuid().ToString("N")[..Math.Min(8, 32)]; _functionName = $"durable-integ-{suffix}-{ShortId()}"; - _roleName = $"durable-integ-{suffix}-{ShortId()}"; _externalFunctionName = $"durable-integ-{suffix}-ext-{ShortId()}"; - _externalRoleName = $"durable-integ-{suffix}-ext-{ShortId()}"; + } + + /// + /// Builds a client config tuned to survive throttling when the suite runs in parallel: + /// adaptive retry (client-side rate limiting + backoff on throttle) and a generous retry count. + /// + private static TConfig BuildClientConfig() where TConfig : ClientConfig, new() + { + var config = new TConfig + { + RegionEndpoint = DeploymentRegion, + RetryMode = RequestRetryMode.Adaptive, + MaxErrorRetry = 10 + }; + return config; } // The optional `handler` defaults to `bootstrap` (executable model). Pass an @@ -192,143 +227,125 @@ public static async Task CreateAsync( } """; - private async Task InitializeAsync( - string testFunctionDir, - string? externalFunctionDir, - IDictionary? environment, - IReadOnlyList? invokeAllowedFunctionArns, - bool enableTenancy, - string? handler) + // Inline policy granting the permissions every durable-integ scenario needs: invoking any + // durable-integ-* function (covers chained invoke and external-function invoke) and sending + // durable-execution callbacks. Resource is scoped to the suite's function name prefix. + private const string SharedInlinePolicyName = "DurableIntegSharedPermissions"; + private const string SharedInlinePolicyDocument = """ { - // 1. Create the workflow's IAM role. - _output.WriteLine($"Creating IAM role: {_roleName}"); - var createRoleResponse = await _iamClient.CreateRoleAsync(new CreateRoleRequest - { - RoleName = _roleName, - AssumeRolePolicyDocument = LambdaAssumeRolePolicy - }); - _roleArn = createRoleResponse.Role.Arn; + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": "lambda:InvokeFunction", + "Resource": [ + "arn:aws:lambda:*:*:function:durable-integ-*", + "arn:aws:lambda:*:*:function:durable-integ-*:*" + ] + }, + { + "Effect": "Allow", + "Action": [ + "lambda:SendDurableExecutionCallbackSuccess", + "lambda:SendDurableExecutionCallbackFailure" + ], + "Resource": "*" + } + ] + } + """; - await _iamClient.AttachRolePolicyAsync(new AttachRolePolicyRequest - { - RoleName = _roleName, - PolicyArn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" - }); + /// + /// Returns the ARN of the shared execution role, creating it once per account if absent. + /// Gated by a semaphore + memoized ARN so concurrent deployments don't race or re-create it. + /// In steady state (role already exists from a prior run) this is a single GetRole call for the + /// entire suite, which is what keeps the parallel run under IAM's mutating-call rate limits. + /// + private async Task GetOrCreateSharedRoleAsync() + { + if (_sharedRoleArn != null) + return _sharedRoleArn; - await _iamClient.AttachRolePolicyAsync(new AttachRolePolicyRequest + await SharedRoleGate.WaitAsync(); + try { - RoleName = _roleName, - PolicyArn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicDurableExecutionRolePolicy" - }); + if (_sharedRoleArn != null) + return _sharedRoleArn; - // 2. (optional) Create the external function's IAM role up front so its - // sts:AssumeRole and lambda:SendDurableExecutionCallbackSuccess - // permissions propagate alongside the workflow role's permissions - // (single 10-second sleep covers both). - if (externalFunctionDir != null) - { - _output.WriteLine($"Creating external IAM role: {_externalRoleName}"); - var extRoleResponse = await _iamClient.CreateRoleAsync(new CreateRoleRequest + try { - RoleName = _externalRoleName, + var existing = await _iamClient.GetRoleAsync(new GetRoleRequest { RoleName = SharedRoleName }); + _output.WriteLine($"Reusing shared IAM role: {SharedRoleName}"); + _sharedRoleArn = existing.Role.Arn; + return _sharedRoleArn; + } + catch (NoSuchEntityException) + { + // Falls through to create it. + } + + _output.WriteLine($"Creating shared IAM role: {SharedRoleName}"); + var created = await _iamClient.CreateRoleAsync(new CreateRoleRequest + { + RoleName = SharedRoleName, AssumeRolePolicyDocument = LambdaAssumeRolePolicy }); - _externalRoleArn = extRoleResponse.Role.Arn; await _iamClient.AttachRolePolicyAsync(new AttachRolePolicyRequest { - RoleName = _externalRoleName, + RoleName = SharedRoleName, PolicyArn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" }); - - // Inline policy lets the external function call the durable callback API. - // Resource "*" because we don't yet know the workflow's ARN at this point — - // the external function only resolves callbacks belonging to executions the - // workflow created, so the blast radius is bounded by the role's lifetime. - await _iamClient.PutRolePolicyAsync(new PutRolePolicyRequest + await _iamClient.AttachRolePolicyAsync(new AttachRolePolicyRequest { - RoleName = _externalRoleName, - PolicyName = "SendDurableExecutionCallback", - PolicyDocument = """ - { - "Version": "2012-10-17", - "Statement": [{ - "Effect": "Allow", - "Action": [ - "lambda:SendDurableExecutionCallbackSuccess", - "lambda:SendDurableExecutionCallbackFailure" - ], - "Resource": "*" - }] - } - """ + RoleName = SharedRoleName, + PolicyArn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicDurableExecutionRolePolicy" }); - - // Workflow function will Invoke the external function — grant via inline policy. - // Scoped to the external function name we just minted. await _iamClient.PutRolePolicyAsync(new PutRolePolicyRequest { - RoleName = _roleName, - PolicyName = "InvokeExternalFunction", - PolicyDocument = $$""" - { - "Version": "2012-10-17", - "Statement": [{ - "Effect": "Allow", - "Action": "lambda:InvokeFunction", - "Resource": "arn:aws:lambda:*:*:function:{{_externalFunctionName}}" - }] - } - """ + RoleName = SharedRoleName, + PolicyName = SharedInlinePolicyName, + PolicyDocument = SharedInlinePolicyDocument }); - _inlinePolicyNames.Add("InvokeExternalFunction"); - } - // Grant cross-Lambda invoke when the parent of a chained-invoke scenario - // needs to call out to a downstream function. The durable execution service - // is the one that actually drives the chained invocation in production — - // attaching this directly to the parent's role keeps the parent role - // capable of being used in non-durable contexts (e.g. for diagnostic - // direct invokes from the test harness). - if (invokeAllowedFunctionArns != null && invokeAllowedFunctionArns.Count > 0) + // Wait for IAM propagation so the first function create doesn't hit + // "The role defined for the function cannot be assumed by Lambda". + await Task.Delay(TimeSpan.FromSeconds(10)); + + _sharedRoleArn = created.Role.Arn; + return _sharedRoleArn; + } + finally { - // Allow both the unqualified ARN and any qualifier (alias/version/$LATEST). - var resources = new List(invokeAllowedFunctionArns.Count * 2); - foreach (var arn in invokeAllowedFunctionArns) - { - resources.Add(arn); - resources.Add(arn + ":*"); - } - var resourceJson = "[" + string.Join(",", resources.Select(r => $"\"{r}\"")) + "]"; - var policyDoc = $$""" - { - "Version": "2012-10-17", - "Statement": [{ - "Effect": "Allow", - "Action": ["lambda:InvokeFunction"], - "Resource": {{resourceJson}} - }] - } - """; - const string PolicyName = "AllowChainedInvoke"; - await _iamClient.PutRolePolicyAsync(new PutRolePolicyRequest - { - RoleName = _roleName, - PolicyName = PolicyName, - PolicyDocument = policyDoc - }); - _inlinePolicyNames.Add(PolicyName); + SharedRoleGate.Release(); } + } - // Wait for IAM propagation. - await Task.Delay(TimeSpan.FromSeconds(10)); + private async Task InitializeAsync( + string testFunctionDir, + string? externalFunctionDir, + IDictionary? environment, + IReadOnlyList? invokeAllowedFunctionArns, + bool enableTenancy, + string? handler) + { + // 1. Acquire the shared IAM role (created once per account, reused across tests and runs). + // Both the workflow function and any paired external function run under this single role, + // which carries the union of permissions every scenario needs. The external function's + // callback-send permission and the workflow's invoke permission are all baked into the + // shared role, so per-test PutRolePolicy calls are no longer needed. + _roleArn = await GetOrCreateSharedRoleAsync(); + if (externalFunctionDir != null) + { + _externalRoleArn = _roleArn; + } - // 3. Build + zip the workflow function package. + // 2. Build + zip the workflow function package. _output.WriteLine($"Building and zipping function package from {testFunctionDir}..."); var zipBytes = await BuildAndZipAsync(testFunctionDir); _output.WriteLine($"Package built: {zipBytes.Length} bytes"); - // 4. (optional) Build + deploy the external function. Done before the workflow + // 3. (optional) Build + deploy the external function. Done before the workflow // Lambda so the workflow function's environment can reference the external // function name (which is already known from the ctor). if (externalFunctionDir != null) @@ -355,7 +372,7 @@ await _lambdaClient.CreateFunctionAsync(new CreateFunctionRequest await WaitForFunctionActive(_externalFunctionName); } - // 5. Create the workflow Lambda. + // 4. Create the workflow Lambda. _output.WriteLine($"Creating Lambda function: {_functionName}"); var createFunctionRequest = new CreateFunctionRequest { @@ -604,42 +621,18 @@ private async Task WaitForFunctionActive(string functionName) } /// - /// Publishes a test function (framework-dependent, linux-x64) and zips the publish - /// output for upload as a managed-runtime Lambda package. The zip contains the native - /// bootstrap shim that the dotnet managed runtime execs (executable model). + /// Returns the zipped, published package for a test function. The actual publishing happens + /// once for all functions (see ); this just zips + /// the already-published output. The zip contains the native bootstrap shim that the + /// dotnet managed runtime execs (executable model). /// private async Task BuildAndZipAsync(string testFunctionDir) { - // `dotnet test` spins up one testhost per TargetFramework (net8.0 + net10.0) and - // runs them concurrently. Both testhosts invoke the same test classes, which means - // two processes can race on the same TestFunctions// source dir — wiping bin/ - // and obj/ under each other's feet. Symptom: MSB3030 "Could not copy bootstrap.dll" - // because one process deleted obj/ while the other was mid-publish. Serialize the - // per-source-dir build with a cross-process file lock so different test functions - // can still build in parallel. (A Mutex would have thread-affinity issues across - // awaits; an exclusive FileStream avoids that.) Lock file goes under temp — keeping - // it out of the source tree avoids polluting git status across worktrees. - var lockKey = Convert.ToHexString(System.Security.Cryptography.SHA256.HashData( - Encoding.UTF8.GetBytes(testFunctionDir.ToLowerInvariant())))[..16]; - var lockPath = Path.Combine(Path.GetTempPath(), $"durable-integ-build-{lockKey}.lock"); - using var lockHandle = await AcquireExclusiveFileLockAsync(lockPath, TimeSpan.FromMinutes(10)); + await EnsureAllFunctionsPublishedAsync(); var publishDir = Path.Combine(testFunctionDir, "bin", "publish"); - if (Directory.Exists(publishDir)) Directory.Delete(publishDir, true); - - // MSBuild's up-to-date check leaves stale .Up2Date markers under obj/ that - // make `dotnet publish` skip the copy-to-output step on a second run after - // we've wiped bin/publish/. Result: empty publish dir → empty zip package. - // Nuking obj/ guarantees a real publish each time the helper is invoked. - // Cheap (each test function is small). - var objDir = Path.Combine(testFunctionDir, "obj"); - if (Directory.Exists(objDir)) Directory.Delete(objDir, true); - var binDir = Path.Combine(testFunctionDir, "bin"); - if (Directory.Exists(binDir)) Directory.Delete(binDir, true); - - await RunProcess("dotnet", - $"publish -c Release -r linux-x64 --self-contained false -o \"{publishDir}\"", - testFunctionDir); + if (!Directory.Exists(publishDir)) + throw new DirectoryNotFoundException($"Expected published output at '{publishDir}' but it does not exist."); // Zip the publish output. On Linux (CI) ZipFile preserves the bootstrap exec bit; // on Windows the managed runtime tolerates the missing bit. @@ -650,21 +643,75 @@ await RunProcess("dotnet", return await File.ReadAllBytesAsync(zipPath); } - private static async Task AcquireExclusiveFileLockAsync(string lockPath, TimeSpan timeout) + /// + /// Publishes every test function once, up front, in a SINGLE MSBuild invocation. Runs at most + /// once per test run (gated + memoized). A generated traversal project references all function + /// projects and publishes them with one dotnet build, so MSBuild builds the shared + /// dependency projects once and publishes the functions in parallel within that one process — + /// avoiding both the per-project CLI/MSBuild startup cost of N separate dotnet publish + /// calls and the cross-process thrash that those caused when the suite ran in parallel. Each + /// function still lands in its own bin/publish; tests then only zip that output. + /// + private async Task EnsureAllFunctionsPublishedAsync() { - var deadline = DateTime.UtcNow + timeout; - while (true) + if (_prePublished) + return; + + await PrePublishGate.WaitAsync(); + try { + if (_prePublished) + return; + + var testFunctionsRoot = Path.GetFullPath( + Path.Combine(AppContext.BaseDirectory, "..", "..", "..", "TestFunctions")); + var projects = Directory.GetFiles(testFunctionsRoot, "*.csproj", SearchOption.AllDirectories) + .OrderBy(p => p, StringComparer.Ordinal) + .ToList(); + + _output.WriteLine($"Pre-publishing {projects.Count} test function(s) in a single MSBuild pass..."); + + // Generate a traversal project that publishes every function project to its own + // bin/publish (PublishDir relative to each project). BuildInParallel lets MSBuild fan + // the publishes out across nodes once the shared dependency projects are built. + var itemsXml = string.Concat(projects.Select(p => + $" \n")); + var traversalProject = $""" + + + {itemsXml} + + + + + + """; + + var traversalPath = Path.Combine(Path.GetTempPath(), $"durable-integ-publish-all-{Guid.NewGuid():N}.proj"); + await File.WriteAllTextAsync(traversalPath, traversalProject); try { - return new FileStream(lockPath, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None); + // -maxcpucount lets MSBuild use multiple nodes for the parallel publishes. + await RunProcess("dotnet", + $"build \"{traversalPath}\" -t:PublishAll -maxcpucount", + testFunctionsRoot); } - catch (IOException) + finally { - if (DateTime.UtcNow >= deadline) - throw new TimeoutException($"Timed out waiting for build lock '{lockPath}' after {timeout.TotalSeconds:F0}s"); - await Task.Delay(TimeSpan.FromMilliseconds(500)); + try { File.Delete(traversalPath); } catch { /* best effort */ } } + + _prePublished = true; + } + finally + { + PrePublishGate.Release(); } } @@ -742,64 +789,9 @@ public async ValueTask DisposeAsync() catch (Exception ex) { _output.WriteLine($"Cleanup error (external function): {ex.Message}"); } } - if (_roleArn != null) - { - // Detach each policy independently — if one detach fails (e.g., the - // policy was never attached because init bailed out early) we still - // want to attempt the others and the final DeleteRole. - await TryDetachManaged(_roleName, "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"); - await TryDetachManaged(_roleName, "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicDurableExecutionRolePolicy"); - - // Inline policies must be deleted (not detached) before DeleteRole succeeds. - foreach (var inline in _inlinePolicyNames) - { - await TryDeleteInline(_roleName, inline); - } - - try - { - await _iamClient.DeleteRoleAsync(new DeleteRoleRequest { RoleName = _roleName }); - } - catch (Exception ex) { _output.WriteLine($"Cleanup error (IAM DeleteRole): {ex.Message}"); } - } - - if (_externalRoleArn != null) - { - await TryDetachManaged(_externalRoleName, "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"); - await TryDeleteInline(_externalRoleName, "SendDurableExecutionCallback"); - try - { - await _iamClient.DeleteRoleAsync(new DeleteRoleRequest { RoleName = _externalRoleName }); - } - catch (Exception ex) { _output.WriteLine($"Cleanup error (IAM DeleteRole external): {ex.Message}"); } - } - - async Task TryDetachManaged(string roleName, string policyArn) - { - try - { - await _iamClient.DetachRolePolicyAsync(new DetachRolePolicyRequest - { - RoleName = roleName, - PolicyArn = policyArn - }); - } - catch (Exception ex) { _output.WriteLine($"Cleanup error (IAM Detach {policyArn}): {ex.Message}"); } - } - - async Task TryDeleteInline(string roleName, string policyName) - { - try - { - await _iamClient.DeleteRolePolicyAsync(new DeleteRolePolicyRequest - { - RoleName = roleName, - PolicyName = policyName - }); - } - catch (NoSuchEntityException) { /* policy was never attached — fine */ } - catch (Exception ex) { _output.WriteLine($"Cleanup error (IAM DeleteInline {policyName}): {ex.Message}"); } - } + // The shared IAM role is intentionally NOT deleted here — it is reused by every test and + // across runs. Deleting/recreating it per test is exactly what burst-throttled IAM. It is a + // single stable role (durable-integ-shared-execution-role) that the test account retains. } public static string FindTestFunctionDir(string functionDirName) From c2dee0c8eac8ba5103c4944f3ef573748f64d7c0 Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Sat, 27 Jun 2026 13:00:59 -0400 Subject: [PATCH 06/12] Fix flaky FileDescriptorLogStream test: capture exact written bytes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit MaxSizeProducesOneLogFrame intermittently failed with 'Expected: 16, Actual: 15' on the header length. The header ends with an 8-byte big-endian microsecond timestamp; roughly 1 in 256 timestamps ends in a 0x00 byte. TestFileStream's Write captured bytes via TrimTrailingNullBytes(buffer).Take(count), which stripped that legitimate trailing zero, yielding a 15-byte header. Capture exactly buffer[offset, offset + count) instead — that is precisely what the production code wrote, and it no longer depends on the timestamp's value. --- .../TestHelpers/TestFileStream.cs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/Libraries/test/Amazon.Lambda.RuntimeSupport.Tests/Amazon.Lambda.RuntimeSupport.UnitTests/TestHelpers/TestFileStream.cs b/Libraries/test/Amazon.Lambda.RuntimeSupport.Tests/Amazon.Lambda.RuntimeSupport.UnitTests/TestHelpers/TestFileStream.cs index 0b3d3b8fc..bd9b0946e 100644 --- a/Libraries/test/Amazon.Lambda.RuntimeSupport.Tests/Amazon.Lambda.RuntimeSupport.UnitTests/TestHelpers/TestFileStream.cs +++ b/Libraries/test/Amazon.Lambda.RuntimeSupport.Tests/Amazon.Lambda.RuntimeSupport.UnitTests/TestHelpers/TestFileStream.cs @@ -1,7 +1,5 @@ using System; -using System.Collections.Generic; using System.IO; -using System.Linq; namespace Amazon.Lambda.RuntimeSupport.UnitTests.TestHelpers { @@ -19,13 +17,14 @@ public TestFileStream(Action writeAction) public override void Write(byte[] buffer, int offset, int count) { - WriteAction(TrimTrailingNullBytes(buffer).Take(count).ToArray(), offset, count); - } - - private static IEnumerable TrimTrailingNullBytes(IEnumerable buffer) - { - // Trim trailing null bytes to make testing assertions easier - return buffer.Reverse().SkipWhile(x => x == 0).Reverse(); + // Capture exactly the bytes that were written: [offset, offset + count). + // The previous implementation trimmed trailing null bytes from the buffer, which was + // flaky: a log header ends with an 8-byte big-endian microsecond timestamp, and roughly + // 1 in 256 timestamps ends in a 0x00 byte. Trimming that legitimate byte made the + // captured header 15 bytes instead of 16 and failed MaxSizeProducesOneLogFrame. + var written = new byte[count]; + Array.Copy(buffer, offset, written, 0, count); + WriteAction(written, offset, count); } } } From ee9ad183b53ef6f2f0c19ec4c7b8ee16ad913e5f Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Sat, 27 Jun 2026 13:19:14 -0400 Subject: [PATCH 07/12] Throttle Lambda control-plane calls in parallel durable integ suite MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After the shared-role fix removed IAM throttling, the throttling moved to Lambda's account-wide control-plane APIs: with maxParallelThreads=4, the combination of CreateFunction + DeleteFunction + WaitForFunctionActive polling GetFunctionConfiguration exceeded Lambda's limits, surfacing as 'Rate exceeded' and adaptive retry's 'capacity could not be obtained'. Two compounding causes addressed: - Each deployment built its own AWS clients, so adaptive retry's per-client rate limiter couldn't coordinate across the parallel deployments — N clients each assumed they had capacity and fired at once. Make the Lambda and IAM clients static/shared so adaptive retry actually paces the whole suite. - Cap concurrent Lambda control-plane calls (create/delete/get-configuration) with a suite-wide semaphore (limit 2) via a RunControlPlaneAsync helper, so the 4 parallel test threads don't collectively exceed Lambda's control-plane rate. Data-plane calls (Invoke, durable-execution reads) are not gated. Also slow the WaitForFunctionActive poll from 2s to 3s to cut its call rate. --- .../DurableFunctionDeployment.cs | 64 +++++++++++++------ 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs index 58e0ca71e..d3935e86d 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs @@ -37,8 +37,22 @@ internal sealed class DurableFunctionDeployment : IAsyncDisposable private static readonly RegionEndpoint DeploymentRegion = RegionEndpoint.USEast1; private readonly ITestOutputHelper _output; - private readonly IAmazonLambda _lambdaClient; - private readonly IAmazonIdentityManagementService _iamClient; + + // Clients are shared (static) across all deployments. Each deployment used to construct its + // own clients, which defeated adaptive retry: its congestion controller / rate limiter is + // per-client, so N independent clients each believed they had capacity, all fired at once, and + // collectively blew Lambda's account-wide control-plane limits ("capacity could not be obtained + // ... insufficient capacity"). A single shared client per service lets adaptive retry actually + // coordinate backoff across the parallel deployments. + private static readonly IAmazonLambda _lambdaClient = new AmazonLambdaClient(BuildClientConfig()); + private static readonly IAmazonIdentityManagementService _iamClient = + new AmazonIdentityManagementServiceClient(BuildClientConfig()); + + // Lambda control-plane calls (CreateFunction/DeleteFunction/GetFunctionConfiguration) are + // account-rate-limited and are the next bottleneck once IAM is no longer per-test. Cap how many + // run concurrently across the whole suite so the parallel deployments don't collectively exceed + // Lambda's limits; data-plane calls (Invoke, durable-execution reads) are not gated. + private static readonly SemaphoreSlim LambdaControlPlaneGate = new(2, 2); private readonly string _functionName; private string? _roleArn; @@ -94,13 +108,6 @@ internal sealed class DurableFunctionDeployment : IAsyncDisposable private DurableFunctionDeployment(ITestOutputHelper output, string suffix) { _output = output; - // The integration suite runs its test classes in parallel, so several deployments hit IAM - // (CreateRole/AttachRolePolicy/DeleteRole) at once. IAM has low request-rate limits and - // returns "Rate exceeded" under that contention. Adaptive retry adds client-side rate - // limiting and backs off on throttling, and a higher retry count rides out longer throttle - // windows, instead of failing the test on the first throttle. - _lambdaClient = new AmazonLambdaClient(BuildClientConfig()); - _iamClient = new AmazonIdentityManagementServiceClient(BuildClientConfig()); // Truncate the GUID (not the suffix) so CloudTrail entries stay readable. // Keep the GUID short enough that the total stays well under 40 chars even for long suffixes. @@ -354,7 +361,7 @@ private async Task InitializeAsync( var extZipBytes = await BuildAndZipAsync(externalFunctionDir); _output.WriteLine($"Creating external Lambda function: {_externalFunctionName}"); - await _lambdaClient.CreateFunctionAsync(new CreateFunctionRequest + await RunControlPlaneAsync(() => _lambdaClient.CreateFunctionAsync(new CreateFunctionRequest { FunctionName = _externalFunctionName, Runtime = ManagedRuntime, @@ -365,7 +372,7 @@ await _lambdaClient.CreateFunctionAsync(new CreateFunctionRequest MemorySize = 256, LoggingConfig = new LoggingConfig { LogFormat = LogFormat.JSON } // No DurableConfig — this is a plain function. - }); + })); _externalFunctionCreated = true; _output.WriteLine("Waiting for external function to become Active..."); @@ -423,7 +430,7 @@ await _lambdaClient.CreateFunctionAsync(new CreateFunctionRequest }; } - var createFunctionResponse = await _lambdaClient.CreateFunctionAsync(createFunctionRequest); + var createFunctionResponse = await RunControlPlaneAsync(() => _lambdaClient.CreateFunctionAsync(createFunctionRequest)); _functionCreated = true; _functionArn = createFunctionResponse.FunctionArn; @@ -604,22 +611,43 @@ private void DumpEvents(GetDurableExecutionHistoryResponse history) private async Task WaitForFunctionActive(string functionName) { - for (int i = 0; i < 60; i++) + for (int i = 0; i < 40; i++) { try { - var config = await _lambdaClient.GetFunctionConfigurationAsync( - new GetFunctionConfigurationRequest { FunctionName = functionName }); + // Gate each poll call: GetFunctionConfiguration is control-plane and rate-limited, + // and all parallel deployments poll at once. + var config = await RunControlPlaneAsync(() => _lambdaClient.GetFunctionConfigurationAsync( + new GetFunctionConfigurationRequest { FunctionName = functionName })); if (config.State == State.Active) return; if (config.State == State.Failed) throw new Exception($"Function '{functionName}' creation failed: {config.StateReasonCode} - {config.StateReason}"); } catch (ResourceNotFoundException) { } - await Task.Delay(TimeSpan.FromSeconds(2)); + await Task.Delay(TimeSpan.FromSeconds(3)); } throw new TimeoutException($"Function '{functionName}' did not become Active within 120 seconds"); } + /// + /// Runs a Lambda control-plane operation under so the + /// suite's parallel deployments don't collectively exceed Lambda's account-wide + /// control-plane request rate. Adaptive retry on the shared client handles brief throttles; + /// this gate keeps the offered load low enough that retry doesn't exhaust its capacity. + /// + private static async Task RunControlPlaneAsync(Func> operation) + { + await LambdaControlPlaneGate.WaitAsync(); + try + { + return await operation(); + } + finally + { + LambdaControlPlaneGate.Release(); + } + } + /// /// Returns the zipped, published package for a test function. The actual publishing happens /// once for all functions (see ); this just zips @@ -774,7 +802,7 @@ public async ValueTask DisposeAsync() try { _output.WriteLine($"Deleting function: {_functionName}"); - await _lambdaClient.DeleteFunctionAsync(new DeleteFunctionRequest { FunctionName = _functionName }); + await RunControlPlaneAsync(() => _lambdaClient.DeleteFunctionAsync(new DeleteFunctionRequest { FunctionName = _functionName })); } catch (Exception ex) { _output.WriteLine($"Cleanup error (function): {ex.Message}"); } } @@ -784,7 +812,7 @@ public async ValueTask DisposeAsync() try { _output.WriteLine($"Deleting external function: {_externalFunctionName}"); - await _lambdaClient.DeleteFunctionAsync(new DeleteFunctionRequest { FunctionName = _externalFunctionName }); + await RunControlPlaneAsync(() => _lambdaClient.DeleteFunctionAsync(new DeleteFunctionRequest { FunctionName = _externalFunctionName })); } catch (Exception ex) { _output.WriteLine($"Cleanup error (external function): {ex.Message}"); } } From bbfff3d78c8cac7cd100291b9f7af5b1237db27f Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Sat, 27 Jun 2026 13:54:10 -0400 Subject: [PATCH 08/12] Fix parallel file races in integ tests: tool install and function.zip The CI run no longer throttles IAM or Lambda control-plane (those fixes held), but parallelism surfaced two shared-file races: - 'Cannot create .../dotnet/tools/.store/amazon.lambda.tools/6.0.6 because a file or directory with the same name already exists': the three *.IntegrationTests projects run DeploymentScript.ps1 in parallel and each ran 'dotnet tool install -g Amazon.Lambda.Tools', colliding on the global tool store. Make the install idempotent: skip if already installed, and tolerate the concurrent-install race (already-installed/already-exists treated as success) with a short retry. - 'function.zip ... being used by another process' (ApproverFunction): a test function that is the external function for more than one test was zipped to a shared bin/function.zip by multiple parallel tests at once. Zip to a unique temp path per call instead; the read-only published output is still shared. --- .../DurableFunctionDeployment.cs | 23 +++++++++++----- .../DeploymentScript.ps1 | 27 ++++++++++++++++++- .../DeploymentScript.ps1 | 27 ++++++++++++++++++- .../DeploymentScript.ps1 | 27 ++++++++++++++++++- 4 files changed, 94 insertions(+), 10 deletions(-) diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs index d3935e86d..3116c1686 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs @@ -662,13 +662,22 @@ private async Task BuildAndZipAsync(string testFunctionDir) if (!Directory.Exists(publishDir)) throw new DirectoryNotFoundException($"Expected published output at '{publishDir}' but it does not exist."); - // Zip the publish output. On Linux (CI) ZipFile preserves the bootstrap exec bit; - // on Windows the managed runtime tolerates the missing bit. - var zipPath = Path.Combine(testFunctionDir, "bin", "function.zip"); - if (File.Exists(zipPath)) File.Delete(zipPath); - ZipFile.CreateFromDirectory(publishDir, zipPath, CompressionLevel.Optimal, includeBaseDirectory: false); - - return await File.ReadAllBytesAsync(zipPath); + // Zip the publish output to a UNIQUE temp path. A given function (e.g. ApproverFunction) is + // the external function for more than one test, so multiple parallel tests zip the same + // published output at once — writing to a shared bin/function.zip raced ("file is being used + // by another process"). The publish output itself is read-only and shared safely; only the + // zip destination needs to be per-call. On Linux (CI) ZipFile preserves the bootstrap exec + // bit; on Windows the managed runtime tolerates the missing bit. + var zipPath = Path.Combine(Path.GetTempPath(), $"durable-integ-fn-{Guid.NewGuid():N}.zip"); + try + { + ZipFile.CreateFromDirectory(publishDir, zipPath, CompressionLevel.Optimal, includeBaseDirectory: false); + return await File.ReadAllBytesAsync(zipPath); + } + finally + { + try { File.Delete(zipPath); } catch { /* best effort */ } + } } /// diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/DeploymentScript.ps1 b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/DeploymentScript.ps1 index 3a20e55de..f64ccc6a9 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/DeploymentScript.ps1 +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/DeploymentScript.ps1 @@ -42,7 +42,32 @@ try $json = Get-Content .\aws-lambda-tools-defaults.json | Out-String | ConvertFrom-Json $region = $json.region - dotnet tool install -g Amazon.Lambda.Tools + # Install Amazon.Lambda.Tools idempotently. The integration test projects deploy in parallel, + # so several DeploymentScript.ps1 processes may run "dotnet tool install -g" at the same time and + # collide on the global tool store ("a file or directory with the same name already exists"). + # Skip if already present, and tolerate the concurrent-install race by treating an + # already-installed/already-exists result as success, with a short retry for the transient case. + if (dotnet tool list -g | Select-String -SimpleMatch 'amazon.lambda.tools') + { + Write-Host "Amazon.Lambda.Tools already installed." + } + else + { + for ($i = 1; $i -le 5; $i++) + { + $output = dotnet tool install -g Amazon.Lambda.Tools 2>&1 | Out-String + Write-Host $output + if ($LASTEXITCODE -eq 0 -or $output -match 'already installed' -or $output -match 'already exists') + { + break + } + if ($i -eq 5) + { + throw "Failed to install Amazon.Lambda.Tools after $i attempts." + } + Start-Sleep -Seconds ($i * 3) + } + } Write-Host "Creating S3 Bucket $identifier" if(![string]::IsNullOrEmpty($region)) diff --git a/Libraries/test/TestServerlessApp.ALB.IntegrationTests/DeploymentScript.ps1 b/Libraries/test/TestServerlessApp.ALB.IntegrationTests/DeploymentScript.ps1 index f74ee365f..f5e9e463d 100644 --- a/Libraries/test/TestServerlessApp.ALB.IntegrationTests/DeploymentScript.ps1 +++ b/Libraries/test/TestServerlessApp.ALB.IntegrationTests/DeploymentScript.ps1 @@ -42,7 +42,32 @@ try $json = Get-Content .\aws-lambda-tools-defaults.json | Out-String | ConvertFrom-Json $region = $json.region - dotnet tool install -g Amazon.Lambda.Tools + # Install Amazon.Lambda.Tools idempotently. The integration test projects deploy in parallel, + # so several DeploymentScript.ps1 processes may run "dotnet tool install -g" at the same time and + # collide on the global tool store ("a file or directory with the same name already exists"). + # Skip if already present, and tolerate the concurrent-install race by treating an + # already-installed/already-exists result as success, with a short retry for the transient case. + if (dotnet tool list -g | Select-String -SimpleMatch 'amazon.lambda.tools') + { + Write-Host "Amazon.Lambda.Tools already installed." + } + else + { + for ($i = 1; $i -le 5; $i++) + { + $output = dotnet tool install -g Amazon.Lambda.Tools 2>&1 | Out-String + Write-Host $output + if ($LASTEXITCODE -eq 0 -or $output -match 'already installed' -or $output -match 'already exists') + { + break + } + if ($i -eq 5) + { + throw "Failed to install Amazon.Lambda.Tools after $i attempts." + } + Start-Sleep -Seconds ($i * 3) + } + } Write-Host "Creating S3 Bucket $identifier" if(![string]::IsNullOrEmpty($region)) diff --git a/Libraries/test/TestServerlessApp.IntegrationTests/DeploymentScript.ps1 b/Libraries/test/TestServerlessApp.IntegrationTests/DeploymentScript.ps1 index bbff35b47..5802e5cbd 100644 --- a/Libraries/test/TestServerlessApp.IntegrationTests/DeploymentScript.ps1 +++ b/Libraries/test/TestServerlessApp.IntegrationTests/DeploymentScript.ps1 @@ -42,7 +42,32 @@ try $json = Get-Content .\aws-lambda-tools-defaults.json | Out-String | ConvertFrom-Json $region = $json.region - dotnet tool install -g Amazon.Lambda.Tools + # Install Amazon.Lambda.Tools idempotently. The integration test projects deploy in parallel, + # so several DeploymentScript.ps1 processes may run "dotnet tool install -g" at the same time and + # collide on the global tool store ("a file or directory with the same name already exists"). + # Skip if already present, and tolerate the concurrent-install race by treating an + # already-installed/already-exists result as success, with a short retry for the transient case. + if (dotnet tool list -g | Select-String -SimpleMatch 'amazon.lambda.tools') + { + Write-Host "Amazon.Lambda.Tools already installed." + } + else + { + for ($i = 1; $i -le 5; $i++) + { + $output = dotnet tool install -g Amazon.Lambda.Tools 2>&1 | Out-String + Write-Host $output + if ($LASTEXITCODE -eq 0 -or $output -match 'already installed' -or $output -match 'already exists') + { + break + } + if ($i -eq 5) + { + throw "Failed to install Amazon.Lambda.Tools after $i attempts." + } + Start-Sleep -Seconds ($i * 3) + } + } Write-Host "Creating S3 Bucket $identifier" if(![string]::IsNullOrEmpty($region)) From 181894fd060936b34c740ca77552b1017e9a58f7 Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Mon, 29 Jun 2026 10:44:34 -0400 Subject: [PATCH 09/12] Build integ test projects once before parallel run to avoid shared-output race MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI failed with 'GenerateDepsFile task failed unexpectedly ... IntegrationTests.Helpers.deps.json is being used by another process'. The integration test projects share the IntegrationTests.Helpers ProjectReference; running 'dotnet test' on them in parallel made each run rebuild that shared project concurrently, racing on its build output. Build all projects once, serially, before the parallel phase, then run the parallel 'dotnet test' with --no-build so the concurrent runs only execute tests and never rebuild shared output. The shared helper is built once; subsequent up-front builds are no-ops. (The previous run also confirmed the tool-install fix works: the 'already exists' message is now tolerated and deployment continues — that path is no longer fatal.) --- buildtools/run-integ-tests-parallel.ps1 | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/buildtools/run-integ-tests-parallel.ps1 b/buildtools/run-integ-tests-parallel.ps1 index b87b88295..cbb06f141 100644 --- a/buildtools/run-integ-tests-parallel.ps1 +++ b/buildtools/run-integ-tests-parallel.ps1 @@ -31,13 +31,30 @@ if (-not $projects) Write-Host "Running $($projects.Count) integration test project(s) in parallel (throttle limit $ThrottleLimit):" $projects | ForEach-Object { Write-Host " - $_" } +# Build all projects ONCE, up front, before the parallel phase. The integration test projects share +# the IntegrationTests.Helpers ProjectReference; running `dotnet test` on them concurrently each +# rebuilt that shared project, racing on its build output (e.g. IntegrationTests.Helpers.deps.json +# "being used by another process", GenerateDepsFile task failure). Building once here lets the +# parallel runs use --no-build so they only execute tests, never rebuild shared output. +Write-Host "Building all integration test projects once before running in parallel..." +foreach ($project in $projects) +{ + dotnet build -c $Configuration $project + if ($LASTEXITCODE -ne 0) + { + Write-Host "Build failed for $project (exit $LASTEXITCODE)." + exit 1 + } +} + $results = $projects | ForEach-Object -ThrottleLimit $ThrottleLimit -Parallel { $project = $_ $name = [System.IO.Path]::GetFileNameWithoutExtension($project) $lines = [System.Collections.Generic.List[string]]::new() - # 2>&1 folds stderr into the stream. Each line is emitted to the host as it arrives, prefixed - # with the project name, so progress is visible during the (long) run instead of only at the end. - dotnet test -c $using:Configuration --logger "console;verbosity=detailed" $project 2>&1 | + # --no-build: everything was built serially above, so the parallel runs only execute tests and + # never rebuild the shared IntegrationTests.Helpers project. 2>&1 folds stderr into the stream; + # each line is emitted as it arrives, prefixed with the project name, so progress is visible. + dotnet test -c $using:Configuration --no-build --logger "console;verbosity=detailed" $project 2>&1 | ForEach-Object { $line = $_.ToString() $lines.Add($line) From 9e6af88831d03483e83352c44d20753effacc22d Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Mon, 29 Jun 2026 11:25:05 -0400 Subject: [PATCH 10/12] Fix flaky streaming test: isolate multi-concurrency AsyncLocal writes (test-only) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit StreamingE2EWithMoq.Streaming_AllDataTransmitted_ContentRoundTrip flaked in CI (Assert.NotNull(output) — CapturedHttpBytes was null) only in full-assembly runs, never in isolation. Root cause is cross-test contamination of ResponseStreamFactory's static state. The factory tracks the active invocation in a static field (_onDemandContext, on-demand mode) or an AsyncLocal (_asyncLocalContext, multi-concurrency mode), and GetCurrentContext() prefers the AsyncLocal. Several ResponseStreamFactoryTests called InitializeInvocation(isMultiConcurrency: true) synchronously on the xUnit worker thread, mutating that thread's ExecutionContext; because xUnit reuses thread-pool threads, the AsyncLocal value could remain visible to a later on-demand test. When that test's handler called CreateStream(), GetCurrentContext() returned the stale AsyncLocal context instead of the on-demand one, so the bootstrap's on-demand GetStreamIfCreated() saw no stream and the response silently fell back to the buffered path — CapturedHttpBytes stayed null. Fix is test-only (no shipping code changed): run the multi-concurrency tests that write the AsyncLocal on isolated Task.Run flows so the mutation is confined to a throwaway ExecutionContext and cannot leak across xUnit's reused threads — the same pattern the StreamingE2EWithMoq multi-concurrency tests already use. The streaming tests also reset factory state before each run as belt-and-suspenders. Verified: the failure reproduced ~40% of full-assembly runs before (2/5); after, 12/12 full-assembly runs pass. --- .../ResponseStreamFactoryTests.cs | 66 ++++++++++++------- .../StreamingE2EWithMoq.cs | 10 ++- 2 files changed, 51 insertions(+), 25 deletions(-) diff --git a/Libraries/test/Amazon.Lambda.RuntimeSupport.Tests/Amazon.Lambda.RuntimeSupport.UnitTests/ResponseStreamFactoryTests.cs b/Libraries/test/Amazon.Lambda.RuntimeSupport.Tests/Amazon.Lambda.RuntimeSupport.UnitTests/ResponseStreamFactoryTests.cs index 0b49eb27a..52c6cfd92 100644 --- a/Libraries/test/Amazon.Lambda.RuntimeSupport.Tests/Amazon.Lambda.RuntimeSupport.UnitTests/ResponseStreamFactoryTests.cs +++ b/Libraries/test/Amazon.Lambda.RuntimeSupport.Tests/Amazon.Lambda.RuntimeSupport.UnitTests/ResponseStreamFactoryTests.cs @@ -90,15 +90,21 @@ public void CreateStream_OnDemandMode_ReturnsValidStream() /// Validates: Requirements 1.3, 2.2, 2.3 /// [Fact] - public void CreateStream_MultiConcurrencyMode_ReturnsValidStream() + public Task CreateStream_MultiConcurrencyMode_ReturnsValidStream() { - var mock = new MockStreamingRuntimeApiClient(); - InitializeWithMock("req-2", isMultiConcurrency: true, mock); + // Run on an isolated execution-context flow (Task.Run) so the multi-concurrency + // AsyncLocal context this writes does not leak onto the reused xUnit worker thread and + // contaminate a later on-demand test (see StreamingE2EWithMoq flake). + return Task.Run(() => + { + var mock = new MockStreamingRuntimeApiClient(); + InitializeWithMock("req-2", isMultiConcurrency: true, mock); - var stream = ResponseStreamFactory.CreateStream(Array.Empty()); + var stream = ResponseStreamFactory.CreateStream(Array.Empty()); - Assert.NotNull(stream); - Assert.IsAssignableFrom(stream); + Assert.NotNull(stream); + Assert.IsAssignableFrom(stream); + }); } // --- Property 4: Single Stream Per Invocation --- @@ -192,15 +198,21 @@ public void InitializeInvocation_OnDemand_SetsUpContext() } [Fact] - public void InitializeInvocation_MultiConcurrency_SetsUpContext() + public Task InitializeInvocation_MultiConcurrency_SetsUpContext() { - var mock = new MockStreamingRuntimeApiClient(); - InitializeWithMock("req-5", isMultiConcurrency: true, mock); + // Run on an isolated execution-context flow (Task.Run) so the multi-concurrency + // AsyncLocal context this writes does not leak onto the reused xUnit worker thread and + // contaminate a later on-demand test (see StreamingE2EWithMoq flake). + return Task.Run(() => + { + var mock = new MockStreamingRuntimeApiClient(); + InitializeWithMock("req-5", isMultiConcurrency: true, mock); - Assert.Null(ResponseStreamFactory.GetStreamIfCreated(isMultiConcurrency: true)); + Assert.Null(ResponseStreamFactory.GetStreamIfCreated(isMultiConcurrency: true)); - var stream = ResponseStreamFactory.CreateStream(Array.Empty()); - Assert.NotNull(stream); + var stream = ResponseStreamFactory.CreateStream(Array.Empty()); + Assert.NotNull(stream); + }); } [Fact] @@ -264,21 +276,27 @@ public void StateIsolation_SequentialInvocations_NoLeakage() /// Validates: Requirements 2.9, 2.10 /// [Fact] - public async Task StateIsolation_MultiConcurrency_UsesAsyncLocal() + public Task StateIsolation_MultiConcurrency_UsesAsyncLocal() { - var mock = new MockStreamingRuntimeApiClient(); - InitializeWithMock("req-9", isMultiConcurrency: true, mock); - var stream = ResponseStreamFactory.CreateStream(Array.Empty()); - Assert.NotNull(stream); - - bool childSawNull = false; - await Task.Run(() => + // Run the whole body on an isolated execution-context flow (Task.Run) so the + // multi-concurrency AsyncLocal context written here does not leak onto the reused xUnit + // worker thread and contaminate a later on-demand test (see StreamingE2EWithMoq flake). + return Task.Run(async () => { - ResponseStreamFactory.CleanupInvocation(isMultiConcurrency: true); - childSawNull = ResponseStreamFactory.GetStreamIfCreated(isMultiConcurrency: true) == null; + var mock = new MockStreamingRuntimeApiClient(); + InitializeWithMock("req-9", isMultiConcurrency: true, mock); + var stream = ResponseStreamFactory.CreateStream(Array.Empty()); + Assert.NotNull(stream); + + bool childSawNull = false; + await Task.Run(() => + { + ResponseStreamFactory.CleanupInvocation(isMultiConcurrency: true); + childSawNull = ResponseStreamFactory.GetStreamIfCreated(isMultiConcurrency: true) == null; + }); + + Assert.True(childSawNull); }); - - Assert.True(childSawNull); } } } diff --git a/Libraries/test/Amazon.Lambda.RuntimeSupport.Tests/Amazon.Lambda.RuntimeSupport.UnitTests/StreamingE2EWithMoq.cs b/Libraries/test/Amazon.Lambda.RuntimeSupport.Tests/Amazon.Lambda.RuntimeSupport.UnitTests/StreamingE2EWithMoq.cs index 6f6d6492c..d2f448d2a 100644 --- a/Libraries/test/Amazon.Lambda.RuntimeSupport.Tests/Amazon.Lambda.RuntimeSupport.UnitTests/StreamingE2EWithMoq.cs +++ b/Libraries/test/Amazon.Lambda.RuntimeSupport.Tests/Amazon.Lambda.RuntimeSupport.UnitTests/StreamingE2EWithMoq.cs @@ -37,7 +37,15 @@ public class RuntimeSupportStateCheckCollection { } [Collection("RuntimeSupportStateCheck")] public class StreamingE2EWithMoq : IDisposable { - public void Dispose() + // Reset the factory's static/async-local state before AND after each test so these tests + // start from a clean slate. The root cause of the cross-test leak (multi-concurrency tests + // writing the AsyncLocal on a reused xUnit worker thread) is contained at its source by + // running those tests on isolated Task.Run flows; this reset is belt-and-suspenders. + public StreamingE2EWithMoq() => ResetFactoryState(); + + public void Dispose() => ResetFactoryState(); + + private static void ResetFactoryState() { ResponseStreamFactory.CleanupInvocation(isMultiConcurrency: false); ResponseStreamFactory.CleanupInvocation(isMultiConcurrency: true); From e68642c08b742e89e5335cc3be0ae09e978e876f Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Mon, 29 Jun 2026 12:13:33 -0400 Subject: [PATCH 11/12] Fix parallel-restore race in durable pre-publish: restore before parallel publish MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI failed in the durable pre-publish step with NuGet error: 'The file .../Amazon.Lambda.Serialization.SystemTextJson/obj/project.assets.json already exists.' The single-MSBuild-pass traversal published all function projects with Targets=Restore;Publish and BuildInParallel=true. Restore is not parallel-safe: the function projects share src ProjectReferences (Serialization.SystemTextJson, DurableExecution, Core, RuntimeSupport), so restoring them concurrently raced on the shared obj/project.assets.json. Split into two passes inside the traversal: a single non-parallel Restore across all projects (writes each shared project's assets once), then the parallel Publish (restore already done, so no shared-output race). Verified from a fully cold state (function + shared src obj dirs nuked) — 51/51 functions publish with 0 'already exists' errors. --- .../DurableFunctionDeployment.cs | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs index 3116c1686..17cd444c6 100644 --- a/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs +++ b/Libraries/test/Amazon.Lambda.DurableExecution.IntegrationTests/DurableFunctionDeployment.cs @@ -718,12 +718,22 @@ private async Task EnsureAllFunctionsPublishedAsync() {itemsXml} - + + + From 28c1805e80c2947ba5c69f6a32beaaa221ef329d Mon Sep 17 00:00:00 2001 From: Garrett Beatty Date: Mon, 29 Jun 2026 13:27:45 -0400 Subject: [PATCH 12/12] Fix flaky REST API authorizer integration tests The TestCustomAuthorizerApp REST API (API Gateway v1) valid-auth tests (RestUserInfo_WithValidAuth, SimpleRestApiUserInfo_WithValidAuth) intermittently returned 403 instead of 200. API Gateway returns 403 on the authorizer allow path when the Lambda authorizer wiring has not finished propagating to the endpoint being hit. Three compounding causes, fixed at three layers: - Root cause: AnnotationsRestApi had no EndpointConfiguration, so SAM defaulted to EDGE-optimized. Edge endpoints front through CloudFront and propagate over minutes, unevenly across edge PoPs, so a warmed endpoint could still 403 on a request that hit a different PoP. Set the REST API to REGIONAL (invoke URL format unchanged). The generator never writes EndpointConfiguration, so this survives template regeneration. - Warm-up coverage gap: WarmUpApisAsync only warmed 2 of 4 authorizers and never warmed SimpleRestAuthorizer. Now warms one allow path per distinct authorizer, REST endpoints first (they settle slower than HTTP v2). - Per-test resilience: add RetryHelper.SendWithRetryOnForbiddenAsync (takes a request factory since HttpRequestMessage cannot be resent) and a GetWithValidTokenAsync fixture helper. All 9 allow-path tests now retry a transient 403 instead of failing. Deny/no-auth/partial-context tests, which legitimately expect 403/401, are unchanged. Verified locally: all 20 tests pass, stack deploys first try with regional REST API. --- .../IntegrationTests.Helpers/RetryHelper.cs | 41 +++++++++++++++ .../HttpApiV1Tests.cs | 8 +-- .../HttpApiV2Tests.cs | 24 +++------ .../IntegrationTestContextFixture.cs | 52 +++++++++++++++---- .../NonStringAuthorizerTests.cs | 24 +++------ .../RestApiTests.cs | 8 +-- .../SimpleHttpApiAuthorizerTests.cs | 8 +-- .../SimpleRestApiAuthorizerTests.cs | 8 +-- .../serverless.template | 3 ++ 9 files changed, 105 insertions(+), 71 deletions(-) diff --git a/Libraries/test/IntegrationTests.Helpers/RetryHelper.cs b/Libraries/test/IntegrationTests.Helpers/RetryHelper.cs index 649b53716..895d9d1d4 100644 --- a/Libraries/test/IntegrationTests.Helpers/RetryHelper.cs +++ b/Libraries/test/IntegrationTests.Helpers/RetryHelper.cs @@ -2,6 +2,8 @@ // SPDX-License-Identifier: Apache-2.0 using System; +using System.Net; +using System.Net.Http; using System.Threading.Tasks; namespace IntegrationTests.Helpers @@ -11,6 +13,45 @@ namespace IntegrationTests.Helpers /// public static class RetryHelper { + /// + /// Sends an HTTP request, retrying while API Gateway returns + /// on what is expected to be an authorized request. A freshly deployed API Gateway stage can + /// transiently 403 on the authorizer "allow" path until the Lambda authorizer wiring has fully + /// propagated; once propagated the request returns a stable non-403 status. Because + /// cannot be resent, the caller supplies a factory that builds a + /// fresh request for each attempt. + /// + /// The client used to send the request. + /// Builds a fresh request for each attempt. + /// Maximum total time to keep retrying. Defaults to 2 minutes. + /// Delay between attempts. Defaults to 5 seconds. + /// The first non-403 response, or the last 403 response if the timeout elapses. + public static async Task SendWithRetryOnForbiddenAsync( + HttpClient httpClient, + Func requestFactory, + TimeSpan? timeout = null, + TimeSpan? pollInterval = null) + { + if (httpClient == null) throw new ArgumentNullException(nameof(httpClient)); + if (requestFactory == null) throw new ArgumentNullException(nameof(requestFactory)); + + var interval = pollInterval ?? TimeSpan.FromSeconds(5); + var deadline = DateTime.UtcNow + (timeout ?? TimeSpan.FromMinutes(2)); + + HttpResponseMessage response; + while (true) + { + response = await httpClient.SendAsync(requestFactory()); + if (response.StatusCode != HttpStatusCode.Forbidden || DateTime.UtcNow >= deadline) + { + return response; + } + + response.Dispose(); + await Task.Delay(interval); + } + } + /// /// Polls until it returns true or elapses. /// Useful for gating tests on resources that report ready (e.g. CloudFormation CREATE_COMPLETE) diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV1Tests.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV1Tests.cs index 51eb3a0a0..ea122eef0 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV1Tests.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV1Tests.cs @@ -33,12 +33,8 @@ public HttpApiV1Tests(IntegrationTestContextFixture fixture) [Fact] public async Task HttpApiV1UserInfo_WithValidAuth_ReturnsAuthorizerContext() { - // Arrange - var request = new HttpRequestMessage(HttpMethod.Get, $"{_fixture.HttpApiUrl}/api/http-v1-user-info"); - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", "valid-token"); - - // Act - var response = await _fixture.HttpClient.SendAsync(request); + // Act - retry on transient 403 while the freshly deployed authorizer wiring propagates + var response = await _fixture.GetWithValidTokenAsync($"{_fixture.HttpApiUrl}/api/http-v1-user-info"); // Assert Assert.Equal(HttpStatusCode.OK, response.StatusCode); diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV2Tests.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV2Tests.cs index 6bf1df605..c7f280c95 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV2Tests.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/HttpApiV2Tests.cs @@ -31,12 +31,8 @@ public HttpApiV2Tests(IntegrationTestContextFixture fixture) [Fact] public async Task ProtectedEndpoint_WithValidAuth_ReturnsSuccess() { - // Arrange - var request = new HttpRequestMessage(HttpMethod.Get, $"{_fixture.HttpApiUrl}/api/protected"); - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", "valid-token"); - - // Act - var response = await _fixture.HttpClient.SendAsync(request); + // Act - retry on transient 403 while the freshly deployed authorizer wiring propagates + var response = await _fixture.GetWithValidTokenAsync($"{_fixture.HttpApiUrl}/api/protected"); // Assert Assert.Equal(HttpStatusCode.OK, response.StatusCode); @@ -56,12 +52,8 @@ public async Task ProtectedEndpoint_WithValidAuth_ReturnsSuccess() [Fact] public async Task UserInfo_WithValidAuth_ReturnsAuthorizerContext() { - // Arrange - var request = new HttpRequestMessage(HttpMethod.Get, $"{_fixture.HttpApiUrl}/api/user-info"); - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", "valid-token"); - - // Act - var response = await _fixture.HttpClient.SendAsync(request); + // Act - retry on transient 403 while the freshly deployed authorizer wiring propagates + var response = await _fixture.GetWithValidTokenAsync($"{_fixture.HttpApiUrl}/api/user-info"); // Assert Assert.Equal(HttpStatusCode.OK, response.StatusCode); @@ -97,12 +89,8 @@ public async Task UserInfo_WithMissingAuthorizerContextKey_ReturnsUnauthorized() [Fact] public async Task IHttpResult_WithValidAuth_ReturnsSuccess() { - // Arrange - var request = new HttpRequestMessage(HttpMethod.Get, $"{_fixture.HttpApiUrl}/api/ihttpresult-user-info"); - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", "valid-token"); - - // Act - var response = await _fixture.HttpClient.SendAsync(request); + // Act - retry on transient 403 while the freshly deployed authorizer wiring propagates + var response = await _fixture.GetWithValidTokenAsync($"{_fixture.HttpApiUrl}/api/ihttpresult-user-info"); // Assert Assert.Equal(HttpStatusCode.OK, response.StatusCode); diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/IntegrationTestContextFixture.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/IntegrationTestContextFixture.cs index 71c91b782..31c041122 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/IntegrationTestContextFixture.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/IntegrationTestContextFixture.cs @@ -100,18 +100,49 @@ public async Task InitializeAsync() } /// - /// Polls a representative "allow path" endpoint on each deployed API until the custom authorizer + /// Sends an authenticated GET (valid-token) to , retrying on a transient 403 + /// from API Gateway. A freshly deployed stage can briefly 403 on the authorizer "allow" path until + /// the Lambda authorizer wiring finishes propagating; this resends until a stable non-403 response + /// (200 or 401) is returned. Use this for any test that asserts an authorized request succeeds. + /// + public Task GetWithValidTokenAsync(string url) + { + return RetryHelper.SendWithRetryOnForbiddenAsync(HttpClient, () => + { + var request = new HttpRequestMessage(HttpMethod.Get, url); + request.Headers.Authorization = new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", "valid-token"); + return request; + }); + } + + /// + /// Polls every authorizer "allow path" endpoint on each deployed API until the custom authorizer /// is fully wired and the request succeeds (or a 401 from the backend), confirming the API is /// serving traffic before the test suite runs. + /// + /// Every distinct authorizer must be warmed individually: API Gateway propagates each authorizer's + /// invoke wiring separately, so warming one endpoint does not guarantee a sibling authorizer on the + /// same API is ready. The REST API endpoints are listed first because REST (v1) stages settle slower + /// than HTTP (v2). /// private async Task WarmUpApisAsync() { var timeout = TimeSpan.FromMinutes(2); var pollInterval = TimeSpan.FromSeconds(5); - // A warmed-up authorizer returns a non-403 response on the allow path: either 200 (context - // present) or 401 (backend rejects missing context). A 403 means API Gateway could not yet - // invoke/attach the authorizer, so keep waiting. + // One representative allow-path endpoint per distinct authorizer. A warmed-up authorizer returns + // a non-403 response on the allow path: either 200 (context present) or 401 (backend rejects + // missing context). A 403 means API Gateway could not yet invoke/attach the authorizer, so keep + // waiting. + var allowPaths = new[] + { + $"{RestApiUrl}/api/rest-user-info", // RestApiAuthorizer (REST API token authorizer) + $"{RestApiUrl}/api/simple-restapi-user-info",// SimpleRestAuthorizer (IAuthorizerResult REST authorizer) + $"{HttpApiUrl}/api/user-info", // CustomAuthorizer (HTTP API v2) + $"{HttpApiUrl}/api/http-v1-user-info", // CustomAuthorizerV1 (HTTP API v1) + $"{HttpApiUrl}/api/simple-httpapi-user-info" // SimpleAuthorizer (IAuthorizerResult HTTP authorizer) + }; + async Task EndpointIsReady(string url) { var request = new HttpRequestMessage(HttpMethod.Get, url); @@ -120,13 +151,12 @@ async Task EndpointIsReady(string url) return response.StatusCode != System.Net.HttpStatusCode.Forbidden; } - var restReady = await RetryHelper.WaitForConditionAsync( - () => EndpointIsReady($"{RestApiUrl}/api/rest-user-info"), timeout, pollInterval); - Console.WriteLine($"[IntegrationTest] REST API warm-up {(restReady ? "succeeded" : "timed out")}."); - - var httpReady = await RetryHelper.WaitForConditionAsync( - () => EndpointIsReady($"{HttpApiUrl}/api/user-info"), timeout, pollInterval); - Console.WriteLine($"[IntegrationTest] HTTP API warm-up {(httpReady ? "succeeded" : "timed out")}."); + foreach (var url in allowPaths) + { + var ready = await RetryHelper.WaitForConditionAsync( + () => EndpointIsReady(url), timeout, pollInterval); + Console.WriteLine($"[IntegrationTest] Warm-up for '{url}' {(ready ? "succeeded" : "timed out")}."); + } } public async Task DisposeAsync() diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/NonStringAuthorizerTests.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/NonStringAuthorizerTests.cs index c61670586..bb9d7c77e 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/NonStringAuthorizerTests.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/NonStringAuthorizerTests.cs @@ -35,12 +35,8 @@ public NonStringAuthorizerTests(IntegrationTestContextFixture fixture) [Fact] public async Task NonStringUserInfo_WithValidAuth_ReturnsConvertedValues() { - // Arrange - var request = new HttpRequestMessage(HttpMethod.Get, $"{_fixture.HttpApiUrl}/api/nonstring-user-info"); - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", "valid-token"); - - // Act - var response = await _fixture.HttpClient.SendAsync(request); + // Act - retry on transient 403 while the freshly deployed authorizer wiring propagates + var response = await _fixture.GetWithValidTokenAsync($"{_fixture.HttpApiUrl}/api/nonstring-user-info"); // Assert Assert.Equal(HttpStatusCode.OK, response.StatusCode); @@ -90,12 +86,8 @@ public async Task NonStringUserInfo_WithMissingAuthorizerContextKey_ReturnsUnaut [Fact] public async Task NonStringUserInfo_IntValueIsCorrectType() { - // Arrange - var request = new HttpRequestMessage(HttpMethod.Get, $"{_fixture.HttpApiUrl}/api/nonstring-user-info"); - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", "valid-token"); - - // Act - var response = await _fixture.HttpClient.SendAsync(request); + // Act - retry on transient 403 while the freshly deployed authorizer wiring propagates + var response = await _fixture.GetWithValidTokenAsync($"{_fixture.HttpApiUrl}/api/nonstring-user-info"); // Assert Assert.Equal(HttpStatusCode.OK, response.StatusCode); @@ -114,12 +106,8 @@ public async Task NonStringUserInfo_IntValueIsCorrectType() [Fact] public async Task NonStringUserInfo_BoolValueIsCorrectType() { - // Arrange - var request = new HttpRequestMessage(HttpMethod.Get, $"{_fixture.HttpApiUrl}/api/nonstring-user-info"); - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", "valid-token"); - - // Act - var response = await _fixture.HttpClient.SendAsync(request); + // Act - retry on transient 403 while the freshly deployed authorizer wiring propagates + var response = await _fixture.GetWithValidTokenAsync($"{_fixture.HttpApiUrl}/api/nonstring-user-info"); // Assert Assert.Equal(HttpStatusCode.OK, response.StatusCode); diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/RestApiTests.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/RestApiTests.cs index 1bc01991c..26b7b7c22 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/RestApiTests.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/RestApiTests.cs @@ -34,12 +34,8 @@ public RestApiTests(IntegrationTestContextFixture fixture) [Fact] public async Task RestUserInfo_WithValidAuth_ReturnsAuthorizerContext() { - // Arrange - var request = new HttpRequestMessage(HttpMethod.Get, $"{_fixture.RestApiUrl}/api/rest-user-info"); - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", "valid-token"); - - // Act - var response = await _fixture.HttpClient.SendAsync(request); + // Act - retry on transient 403 while the freshly deployed authorizer wiring propagates + var response = await _fixture.GetWithValidTokenAsync($"{_fixture.RestApiUrl}/api/rest-user-info"); // Assert Assert.Equal(HttpStatusCode.OK, response.StatusCode); diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleHttpApiAuthorizerTests.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleHttpApiAuthorizerTests.cs index 34764731e..5d36e0625 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleHttpApiAuthorizerTests.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleHttpApiAuthorizerTests.cs @@ -34,12 +34,8 @@ public SimpleHttpApiAuthorizerTests(IntegrationTestContextFixture fixture) [Fact] public async Task SimpleHttpApiUserInfo_WithValidAuth_ReturnsAuthorizerContext() { - // Arrange - var request = new HttpRequestMessage(HttpMethod.Get, $"{_fixture.HttpApiUrl}/api/simple-httpapi-user-info"); - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", "valid-token"); - - // Act - var response = await _fixture.HttpClient.SendAsync(request); + // Act - retry on transient 403 while the freshly deployed authorizer wiring propagates + var response = await _fixture.GetWithValidTokenAsync($"{_fixture.HttpApiUrl}/api/simple-httpapi-user-info"); // Assert Assert.Equal(HttpStatusCode.OK, response.StatusCode); diff --git a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleRestApiAuthorizerTests.cs b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleRestApiAuthorizerTests.cs index 8cacdcd7f..ffbdaf5b8 100644 --- a/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleRestApiAuthorizerTests.cs +++ b/Libraries/test/TestCustomAuthorizerApp.IntegrationTests/SimpleRestApiAuthorizerTests.cs @@ -36,12 +36,8 @@ public SimpleRestApiAuthorizerTests(IntegrationTestContextFixture fixture) [Fact] public async Task SimpleRestApiUserInfo_WithValidAuth_ReturnsAuthorizerContext() { - // Arrange - var request = new HttpRequestMessage(HttpMethod.Get, $"{_fixture.RestApiUrl}/api/simple-restapi-user-info"); - request.Headers.Authorization = new AuthenticationHeaderValue("Bearer", "valid-token"); - - // Act - var response = await _fixture.HttpClient.SendAsync(request); + // Act - retry on transient 403 while the freshly deployed authorizer wiring propagates + var response = await _fixture.GetWithValidTokenAsync($"{_fixture.RestApiUrl}/api/simple-restapi-user-info"); // Assert Assert.Equal(HttpStatusCode.OK, response.StatusCode); diff --git a/Libraries/test/TestCustomAuthorizerApp/serverless.template b/Libraries/test/TestCustomAuthorizerApp/serverless.template index cced3af22..de85be513 100644 --- a/Libraries/test/TestCustomAuthorizerApp/serverless.template +++ b/Libraries/test/TestCustomAuthorizerApp/serverless.template @@ -70,6 +70,9 @@ "Type": "AWS::Serverless::Api", "Properties": { "StageName": "Prod", + "EndpointConfiguration": { + "Type": "REGIONAL" + }, "Auth": { "Authorizers": { "RestApiAuthorizer": {