From 8d46f4661a8abf2b9aae0dbcddff26baf24bbb95 Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Tue, 14 Apr 2026 21:06:12 +0100 Subject: [PATCH 01/65] CCM-16073 - Enhanced callbacks --- eslint.config.mjs | 3 +- .../terraform/components/callbacks/README.md | 9 +- .../callbacks/cloudwatch_eventbus_main.tf | 6 + .../cloudwatch_metric_alarm_dlq_depth.tf | 31 -- .../callbacks/elasticache_delivery_state.tf | 178 +++++++ .../terraform/components/callbacks/locals.tf | 48 +- .../callbacks/module_client_delivery.tf | 46 ++ .../callbacks/module_client_destination.tf | 17 - .../callbacks/module_mock_webhook_alb_mtls.tf | 136 ++++++ .../callbacks/module_mock_webhook_lambda.tf | 31 -- .../module_transform_filter_lambda.tf | 14 - .../components/callbacks/pipes_pipe_main.tf | 3 +- .../components/callbacks/variables.tf | 42 ++ .../modules/client-delivery/README.md | 69 +++ .../cloudwatch_event_rule_per_subscription.tf | 101 ++++ .../client-delivery/iam_role_sqs_target.tf | 115 +++++ .../modules/client-delivery/locals.tf | 21 + .../client-delivery/module_dlq_per_client.tf | 43 ++ .../module_https_client_lambda.tf | 71 +++ .../client-delivery/module_sqs_per_client.tf | 39 ++ .../modules/client-delivery/outputs.tf | 34 ++ .../modules/client-delivery/variables.tf | 212 +++++++++ .../modules/client-destination/README.md | 32 -- .../cloudwatch_event_api_destination_this.tf | 10 - .../cloudwatch_event_connection_main.tf | 14 - .../cloudwatch_event_rule_main.tf | 46 -- .../iam_role_api_target_role.tf | 83 ---- .../modules/client-destination/locals.tf | 12 - .../client-destination/module_target_dlq.tf | 41 -- .../modules/client-destination/variables.tf | 67 --- knip.ts | 8 + .../package.json | 2 +- .../helpers/client-subscription-fixtures.ts | 2 + .../src/__tests__/index.component.test.ts | 55 +-- .../src/__tests__/index.test.ts | 75 --- .../__tests__/services/config-cache.test.ts | 2 +- .../__tests__/services/config-loader.test.ts | 2 +- .../services/config-update.component.test.ts | 2 +- .../__tests__/services/payload-signer.test.ts | 49 -- .../services/ssm-applications-map.test.ts | 156 ------ .../src/handler.ts | 116 +---- .../src/index.ts | 19 +- .../src/services/config-loader-service.ts | 2 +- .../src/services/config-loader.ts | 2 +- .../src/services/observability.ts | 14 +- .../src/services/ssm-applications-map.ts | 85 ---- lambdas/https-client-lambda/jest.config.ts | 9 + lambdas/https-client-lambda/lua-transform.js | 7 + lambdas/https-client-lambda/package.json | 39 ++ .../src/__tests__/config-loader.test.ts | 149 ++++++ .../src/__tests__/delivery-metrics.test.ts | 188 ++++++++ .../__tests__/delivery-observability.test.ts | 115 +++++ .../src/__tests__/dlq-sender.test.ts | 57 +++ .../src/__tests__/endpoint-gate.test.ts | 278 +++++++++++ .../__tests__/fixtures/handler-fixtures.ts | 38 ++ .../src/__tests__/handler.test.ts | 443 ++++++++++++++++++ .../src/__tests__/https-client.test.ts | 260 ++++++++++ .../src/__tests__/index.test.ts | 36 ++ .../src/__tests__/payload-signer.test.ts | 52 ++ .../src/__tests__/retry-policy.test.ts | 175 +++++++ .../src/__tests__/sqs-visibility.test.ts | 71 +++ .../__tests__/ssm-applications-map.test.ts | 117 +++++ .../src/__tests__/tls-agent-factory.test.ts | 364 ++++++++++++++ lambdas/https-client-lambda/src/handler.ts | 222 +++++++++ lambdas/https-client-lambda/src/index.ts | 7 + lambdas/https-client-lambda/src/lua.d.ts | 4 + .../src/services/admit.lua | 95 ++++ .../src/services/config-loader.ts | 77 +++ .../src/services/delivery-metrics.ts | 72 +++ .../src/services/delivery-observability.ts | 63 +++ .../src/services/delivery/https-client.ts | 85 ++++ .../src/services/delivery/retry-policy.ts | 79 ++++ .../services/delivery/tls-agent-factory.ts | 200 ++++++++ .../src/services/dlq-sender.ts | 17 + .../src/services/endpoint-gate.ts | 179 +++++++ .../src/services/logger.ts | 1 + .../src/services/payload-signer.ts | 2 +- .../src/services/record-result.lua | 92 ++++ .../src/services/sqs-visibility.ts | 21 + .../src/services/ssm-applications-map.ts | 62 +++ lambdas/https-client-lambda/tsconfig.json | 14 + .../src/__tests__/index.test.ts | 171 +++++++ lambdas/mock-webhook-lambda/src/index.ts | 54 ++- pnpm-lock.yaml | 204 +++++++- pnpm-workspace.yaml | 5 + scripts/config/pre-commit.yaml | 1 + src/config-cache/jest.config.ts | 14 + src/config-cache/package.json | 32 ++ .../src/__tests__/config-cache.test.ts | 75 +++ .../config-cache/src}/config-cache.ts | 0 src/config-cache/src/index.ts | 1 + src/config-cache/tsconfig.json | 14 + .../__tests__/client-config-schema.test.ts | 105 +++++ src/models/src/client-config-schema.ts | 28 ++ src/models/src/client-config.ts | 13 + .../fixtures/subscriptions/mock-client-1.json | 6 + .../fixtures/subscriptions/mock-client-2.json | 12 + .../mock-client-circuit-breaker.json | 40 ++ .../subscriptions/mock-client-mtls.json | 36 ++ .../subscriptions/mock-client-rate-limit.json | 35 ++ tests/integration/helpers/event-factories.ts | 32 ++ .../integration/helpers/mock-client-config.ts | 5 + .../package.json | 1 + .../client-subscription-builder.test.ts | 79 ++++ .../cli/targets-set-certificate.test.ts | 127 +++++ .../entrypoint/cli/targets-set-mtls.test.ts | 107 +++++ .../cli/targets-set-pinning.test.ts | 152 ++++++ .../src/__tests__/fixtures/test-cert.pem | 14 + .../helpers/client-subscription-fixtures.ts | 2 + .../src/domain/client-subscription-builder.ts | 30 ++ .../src/entrypoint/cli/clients-put.ts | 1 - .../entrypoint/cli/targets-set-certificate.ts | 94 ++++ .../src/entrypoint/cli/targets-set-mtls.ts | 101 ++++ .../src/entrypoint/cli/targets-set-pinning.ts | 106 +++++ 114 files changed, 6557 insertions(+), 995 deletions(-) delete mode 100644 infrastructure/terraform/components/callbacks/cloudwatch_metric_alarm_dlq_depth.tf create mode 100644 infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf create mode 100644 infrastructure/terraform/components/callbacks/module_client_delivery.tf delete mode 100644 infrastructure/terraform/components/callbacks/module_client_destination.tf create mode 100644 infrastructure/terraform/components/callbacks/module_mock_webhook_alb_mtls.tf create mode 100644 infrastructure/terraform/modules/client-delivery/README.md create mode 100644 infrastructure/terraform/modules/client-delivery/cloudwatch_event_rule_per_subscription.tf create mode 100644 infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf create mode 100644 infrastructure/terraform/modules/client-delivery/locals.tf create mode 100644 infrastructure/terraform/modules/client-delivery/module_dlq_per_client.tf create mode 100644 infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf create mode 100644 infrastructure/terraform/modules/client-delivery/module_sqs_per_client.tf create mode 100644 infrastructure/terraform/modules/client-delivery/outputs.tf create mode 100644 infrastructure/terraform/modules/client-delivery/variables.tf delete mode 100644 infrastructure/terraform/modules/client-destination/README.md delete mode 100644 infrastructure/terraform/modules/client-destination/cloudwatch_event_api_destination_this.tf delete mode 100644 infrastructure/terraform/modules/client-destination/cloudwatch_event_connection_main.tf delete mode 100644 infrastructure/terraform/modules/client-destination/cloudwatch_event_rule_main.tf delete mode 100644 infrastructure/terraform/modules/client-destination/iam_role_api_target_role.tf delete mode 100644 infrastructure/terraform/modules/client-destination/locals.tf delete mode 100644 infrastructure/terraform/modules/client-destination/module_target_dlq.tf delete mode 100644 infrastructure/terraform/modules/client-destination/variables.tf delete mode 100644 lambdas/client-transform-filter-lambda/src/__tests__/services/payload-signer.test.ts delete mode 100644 lambdas/client-transform-filter-lambda/src/__tests__/services/ssm-applications-map.test.ts delete mode 100644 lambdas/client-transform-filter-lambda/src/services/ssm-applications-map.ts create mode 100644 lambdas/https-client-lambda/jest.config.ts create mode 100644 lambdas/https-client-lambda/lua-transform.js create mode 100644 lambdas/https-client-lambda/package.json create mode 100644 lambdas/https-client-lambda/src/__tests__/config-loader.test.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/dlq-sender.test.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/fixtures/handler-fixtures.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/handler.test.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/https-client.test.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/index.test.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/payload-signer.test.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/sqs-visibility.test.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/ssm-applications-map.test.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts create mode 100644 lambdas/https-client-lambda/src/handler.ts create mode 100644 lambdas/https-client-lambda/src/index.ts create mode 100644 lambdas/https-client-lambda/src/lua.d.ts create mode 100644 lambdas/https-client-lambda/src/services/admit.lua create mode 100644 lambdas/https-client-lambda/src/services/config-loader.ts create mode 100644 lambdas/https-client-lambda/src/services/delivery-metrics.ts create mode 100644 lambdas/https-client-lambda/src/services/delivery-observability.ts create mode 100644 lambdas/https-client-lambda/src/services/delivery/https-client.ts create mode 100644 lambdas/https-client-lambda/src/services/delivery/retry-policy.ts create mode 100644 lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts create mode 100644 lambdas/https-client-lambda/src/services/dlq-sender.ts create mode 100644 lambdas/https-client-lambda/src/services/endpoint-gate.ts create mode 100644 lambdas/https-client-lambda/src/services/logger.ts rename lambdas/{client-transform-filter-lambda => https-client-lambda}/src/services/payload-signer.ts (100%) create mode 100644 lambdas/https-client-lambda/src/services/record-result.lua create mode 100644 lambdas/https-client-lambda/src/services/sqs-visibility.ts create mode 100644 lambdas/https-client-lambda/src/services/ssm-applications-map.ts create mode 100644 lambdas/https-client-lambda/tsconfig.json create mode 100644 src/config-cache/jest.config.ts create mode 100644 src/config-cache/package.json create mode 100644 src/config-cache/src/__tests__/config-cache.test.ts rename {lambdas/client-transform-filter-lambda/src/services => src/config-cache/src}/config-cache.ts (100%) create mode 100644 src/config-cache/src/index.ts create mode 100644 src/config-cache/tsconfig.json create mode 100644 tests/integration/fixtures/subscriptions/mock-client-circuit-breaker.json create mode 100644 tests/integration/fixtures/subscriptions/mock-client-mtls.json create mode 100644 tests/integration/fixtures/subscriptions/mock-client-rate-limit.json create mode 100644 tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-certificate.test.ts create mode 100644 tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-mtls.test.ts create mode 100644 tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-pinning.test.ts create mode 100644 tools/client-subscriptions-management/src/__tests__/fixtures/test-cert.pem create mode 100644 tools/client-subscriptions-management/src/entrypoint/cli/targets-set-certificate.ts create mode 100644 tools/client-subscriptions-management/src/entrypoint/cli/targets-set-mtls.ts create mode 100644 tools/client-subscriptions-management/src/entrypoint/cli/targets-set-pinning.ts diff --git a/eslint.config.mjs b/eslint.config.mjs index eb59432b..9ea6c3e0 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -28,6 +28,7 @@ export default defineConfig([ "**/test-results", "**/playwright-report*", "eslint.config.mjs", + "**/lua-transform.js", ]), //imports @@ -200,7 +201,7 @@ export default defineConfig([ }, }, { - files: ["**/utils/**", "tests/test-team/**", "tests/performance/helpers/**", "lambdas/**/src/**"], + files: ["**/utils/**", "tests/test-team/**", "tests/performance/helpers/**", "lambdas/**/src/**", "src/**/src/**"], rules: { "import-x/prefer-default-export": 0, }, diff --git a/infrastructure/terraform/components/callbacks/README.md b/infrastructure/terraform/components/callbacks/README.md index b1587725..1a784db2 100644 --- a/infrastructure/terraform/components/callbacks/README.md +++ b/infrastructure/terraform/components/callbacks/README.md @@ -18,6 +18,7 @@ | [component](#input\_component) | The variable encapsulating the name of this component | `string` | `"callbacks"` | no | | [default\_tags](#input\_default\_tags) | A map of default tags to apply to all taggable resources within the component | `map(string)` | `{}` | no | | [deploy\_mock\_clients](#input\_deploy\_mock\_clients) | Flag to deploy mock webhook lambda for integration testing (test/dev environments only) | `bool` | `false` | no | +| [elasticache\_data\_storage\_maximum\_gb](#input\_elasticache\_data\_storage\_maximum\_gb) | Maximum data storage in GB for the ElastiCache Serverless delivery state cache | `number` | `1` | no | | [enable\_event\_anomaly\_detection](#input\_enable\_event\_anomaly\_detection) | Enable CloudWatch anomaly detection alarm for inbound event queue message reception | `bool` | `true` | no | | [enable\_xray\_tracing](#input\_enable\_xray\_tracing) | Enable AWS X-Ray active tracing for Lambda functions | `bool` | `false` | no | | [environment](#input\_environment) | The name of the tfscaffold environment | `string` | n/a | yes | @@ -30,6 +31,12 @@ | [log\_level](#input\_log\_level) | The log level to be used in lambda functions within the component. Any log with a lower severity than the configured value will not be logged: https://docs.python.org/3/library/logging.html#levels | `string` | `"INFO"` | no | | [log\_retention\_in\_days](#input\_log\_retention\_in\_days) | The retention period in days for the Cloudwatch Logs events to be retained, default of 0 is indefinite | `number` | `0` | no | | [message\_root\_uri](#input\_message\_root\_uri) | The root URI used for constructing message links in callback payloads | `string` | n/a | yes | +| [mtls\_cert\_secret\_arn](#input\_mtls\_cert\_secret\_arn) | Secrets Manager ARN for the shared mTLS client certificate (production) | `string` | `""` | no | +| [mtls\_mock\_server\_cert\_s3\_key](#input\_mtls\_mock\_server\_cert\_s3\_key) | S3 key for the mock webhook server certificate PEM (signed by the test CA) | `string` | `""` | no | +| [mtls\_mock\_server\_key\_s3\_key](#input\_mtls\_mock\_server\_key\_s3\_key) | S3 key for the mock webhook server private key PEM | `string` | `""` | no | +| [mtls\_test\_ca\_s3\_key](#input\_mtls\_test\_ca\_s3\_key) | S3 key for the test CA certificate PEM bundle used for server verification and the mock webhook server cert chain | `string` | `""` | no | +| [mtls\_test\_cert\_s3\_key](#input\_mtls\_test\_cert\_s3\_key) | S3 key for the test mTLS client certificate bundle (dev) | `string` | `""` | no | +| [mtls\_test\_certs\_s3\_bucket](#input\_mtls\_test\_certs\_s3\_bucket) | S3 bucket containing test mTLS certificate material (dev) | `string` | `""` | no | | [parent\_acct\_environment](#input\_parent\_acct\_environment) | Name of the environment responsible for the acct resources used, affects things like DNS zone. Useful for named dev environments | `string` | `"main"` | no | | [pipe\_event\_patterns](#input\_pipe\_event\_patterns) | value | `list(string)` | `[]` | no | | [pipe\_log\_level](#input\_pipe\_log\_level) | Log level for the EventBridge Pipe. | `string` | `"ERROR"` | no | @@ -45,7 +52,7 @@ | Name | Source | Version | |------|--------|---------| | [client\_config\_bucket](#module\_client\_config\_bucket) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-s3bucket.zip | n/a | -| [client\_destination](#module\_client\_destination) | ../../modules/client-destination | n/a | +| [client\_delivery](#module\_client\_delivery) | ../../modules/client-delivery | n/a | | [client\_transform\_filter\_lambda](#module\_client\_transform\_filter\_lambda) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-lambda.zip | n/a | | [kms](#module\_kms) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-kms.zip | n/a | | [mock\_webhook\_lambda](#module\_mock\_webhook\_lambda) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-lambda.zip | n/a | diff --git a/infrastructure/terraform/components/callbacks/cloudwatch_eventbus_main.tf b/infrastructure/terraform/components/callbacks/cloudwatch_eventbus_main.tf index f0ce95c4..2a6e687f 100644 --- a/infrastructure/terraform/components/callbacks/cloudwatch_eventbus_main.tf +++ b/infrastructure/terraform/components/callbacks/cloudwatch_eventbus_main.tf @@ -2,3 +2,9 @@ resource "aws_cloudwatch_event_bus" "main" { name = local.csi kms_key_identifier = module.kms.key_arn } + +resource "aws_cloudwatch_event_archive" "main" { + name = "${local.csi}-archive" + event_source_arn = aws_cloudwatch_event_bus.main.arn + retention_days = 7 +} diff --git a/infrastructure/terraform/components/callbacks/cloudwatch_metric_alarm_dlq_depth.tf b/infrastructure/terraform/components/callbacks/cloudwatch_metric_alarm_dlq_depth.tf deleted file mode 100644 index e6ed2d9d..00000000 --- a/infrastructure/terraform/components/callbacks/cloudwatch_metric_alarm_dlq_depth.tf +++ /dev/null @@ -1,31 +0,0 @@ -resource "aws_cloudwatch_metric_alarm" "client_dlq_depth" { - for_each = toset(keys(local.config_targets)) - - alarm_name = "${local.csi}-${each.key}-dlq-depth" - alarm_description = join(" ", [ - "RELIABILITY: Messages are in DLQ for ${each.key}.", - "Failed callback deliveries require operator attention.", - ]) - - comparison_operator = "GreaterThanThreshold" - evaluation_periods = 1 - metric_name = "ApproximateNumberOfMessagesVisible" - namespace = "AWS/SQS" - period = 300 - statistic = "Sum" - threshold = 0 - actions_enabled = true - treat_missing_data = "notBreaching" - - dimensions = { - QueueName = "${local.csi}-${each.key}-dlq-queue" - } - - tags = merge( - local.default_tags, - { - Name = "${local.csi}-${each.key}-dlq-depth" - Client = local.config_targets[each.key].client_id - }, - ) -} diff --git a/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf b/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf new file mode 100644 index 00000000..b1c140f0 --- /dev/null +++ b/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf @@ -0,0 +1,178 @@ +resource "aws_elasticache_serverless_cache" "delivery_state" { + name = "${local.csi}-delivery-state" + engine = "valkey" + major_engine_version = "8" + description = "Per-target rate limiting and circuit breaker state for callback delivery" + + snapshot_retention_limit = 0 + + security_group_ids = [aws_security_group.elasticache_delivery_state.id] + subnet_ids = local.acct.private_subnet_ids + + kms_key_id = module.kms.key_arn + + cache_usage_limits { + data_storage { + maximum = var.elasticache_data_storage_maximum_gb + unit = "GB" + } + + ecpu_per_second { + maximum = 1000 + } + } + + tags = merge( + local.default_tags, + { + Name = "${local.csi}-delivery-state" + Description = "Callback delivery rate limiter and circuit breaker state" + }, + ) +} + +resource "aws_security_group" "elasticache_delivery_state" { + name = "${local.csi}-elasticache-delivery-state" + description = "Security group for ElastiCache delivery state cluster" + vpc_id = local.acct.vpc_id + + tags = merge( + local.default_tags, + { + Name = "${local.csi}-elasticache-delivery-state" + }, + ) +} + +resource "aws_vpc_security_group_ingress_rule" "elasticache_from_lambda" { + security_group_id = aws_security_group.elasticache_delivery_state.id + referenced_security_group_id = aws_security_group.https_client_lambda.id + from_port = 6379 + to_port = 6379 + ip_protocol = "tcp" + description = "Allow HTTPS Client Lambda to connect to ElastiCache" + + tags = local.default_tags +} + +resource "aws_security_group" "https_client_lambda" { + name = "${local.csi}-https-client-lambda" + description = "Security group for per-client HTTPS Client Lambda functions" + vpc_id = local.acct.vpc_id + + tags = merge( + local.default_tags, + { + Name = "${local.csi}-https-client-lambda" + }, + ) +} + +resource "aws_vpc_security_group_egress_rule" "lambda_to_elasticache" { + security_group_id = aws_security_group.https_client_lambda.id + referenced_security_group_id = aws_security_group.elasticache_delivery_state.id + from_port = 6379 + to_port = 6379 + ip_protocol = "tcp" + description = "Allow Lambda to connect to ElastiCache" + + tags = local.default_tags +} + +resource "aws_vpc_security_group_egress_rule" "lambda_to_https" { + security_group_id = aws_security_group.https_client_lambda.id + cidr_ipv4 = "0.0.0.0/0" + from_port = 0 + to_port = 65535 + ip_protocol = "tcp" + description = "Allow Lambda outbound TCP for HTTPS webhook delivery (port defined per-client in webhook URL)" + + tags = local.default_tags +} + +resource "aws_cloudwatch_metric_alarm" "elasticache_ecpu_utilisation" { + alarm_name = "${local.csi}-elasticache-ecpu-utilisation" + alarm_description = join(" ", [ + "PERFORMANCE: ElastiCache processing units utilisation is high.", + "Consider scaling up or optimising Redis commands.", + ]) + + comparison_operator = "GreaterThanThreshold" + evaluation_periods = 3 + metric_name = "ElastiCacheProcessingUnits" + namespace = "AWS/ElastiCache" + period = 300 + statistic = "Average" + threshold = 80 + actions_enabled = true + treat_missing_data = "notBreaching" + + dimensions = { + CacheClusterId = aws_elasticache_serverless_cache.delivery_state.name + } + + tags = merge( + local.default_tags, + { + Name = "${local.csi}-elasticache-ecpu-utilisation" + }, + ) +} + +resource "aws_cloudwatch_metric_alarm" "elasticache_connections" { + alarm_name = "${local.csi}-elasticache-connections" + alarm_description = join(" ", [ + "RELIABILITY: ElastiCache connection count is high.", + "Review per-client Lambda connection pool sizing.", + ]) + + comparison_operator = "GreaterThanThreshold" + evaluation_periods = 2 + metric_name = "CurrConnections" + namespace = "AWS/ElastiCache" + period = 300 + statistic = "Maximum" + threshold = 500 + actions_enabled = true + treat_missing_data = "notBreaching" + + dimensions = { + CacheClusterId = aws_elasticache_serverless_cache.delivery_state.name + } + + tags = merge( + local.default_tags, + { + Name = "${local.csi}-elasticache-connections" + }, + ) +} + +resource "aws_cloudwatch_metric_alarm" "elasticache_throttled_ops" { + alarm_name = "${local.csi}-elasticache-throttled-ops" + alarm_description = join(" ", [ + "PERFORMANCE: ElastiCache throttled operations detected.", + "Increase ECPU limit or reduce request rate.", + ]) + + comparison_operator = "GreaterThanThreshold" + evaluation_periods = 2 + metric_name = "ThrottledCmds" + namespace = "AWS/ElastiCache" + period = 300 + statistic = "Sum" + threshold = 0 + actions_enabled = true + treat_missing_data = "notBreaching" + + dimensions = { + CacheClusterId = aws_elasticache_serverless_cache.delivery_state.name + } + + tags = merge( + local.default_tags, + { + Name = "${local.csi}-elasticache-throttled-ops" + }, + ) +} diff --git a/infrastructure/terraform/components/callbacks/locals.tf b/infrastructure/terraform/components/callbacks/locals.tf index f4707154..d9f06bc3 100644 --- a/infrastructure/terraform/components/callbacks/locals.tf +++ b/infrastructure/terraform/components/callbacks/locals.tf @@ -20,7 +20,7 @@ locals { targets = [ for target in try(client.targets, []) : merge(target, { - invocationEndpoint = "${aws_lambda_function_url.mock_webhook[0].function_url}${target.targetId}" + invocationEndpoint = try(target.mtls.enabled, false) ? "https://${aws_lb.mock_webhook_mtls[0].dns_name}/${target.targetId}" : "http://${aws_lb.mock_webhook_mtls[0].dns_name}/${target.targetId}" apiKey = merge(target.apiKey, { headerValue = random_password.mock_webhook_api_key[0].result }) }) ] @@ -28,39 +28,29 @@ locals { } : local.config_clients - config_targets = merge([ - for client_id, data in local.config_clients : { - for target in try(data.targets, []) : target.targetId => { - client_id = client_id - target_id = target.targetId - invocation_endpoint = var.deploy_mock_clients ? "${aws_lambda_function_url.mock_webhook[0].function_url}${target.targetId}" : target.invocationEndpoint - invocation_rate_limit_per_second = target.invocationRateLimit - http_method = target.invocationMethod - header_name = target.apiKey.headerName - header_value = var.deploy_mock_clients ? random_password.mock_webhook_api_key[0].result : target.apiKey.headerValue - } - } - ]...) - - config_subscriptions = merge([ - for client_id, data in local.config_clients : { - for subscription in try(data.subscriptions, []) : subscription.subscriptionId => { - client_id = client_id + client_subscriptions = { + for client_id, data in local.config_clients : + client_id => { + for subscription in try(data.subscriptions, []) : + subscription.subscriptionId => { subscription_id = subscription.subscriptionId target_ids = try(subscription.targetIds, []) } } - ]...) - - subscription_targets = merge([ - for subscription_id, subscription in local.config_subscriptions : { - for target_id in subscription.target_ids : - "${subscription_id}-${target_id}" => { - subscription_id = subscription_id - target_id = target_id + } + + client_subscription_targets = { + for client_id, data in local.config_clients : + client_id => merge([ + for subscription in try(data.subscriptions, []) : { + for target_id in try(subscription.targetIds, []) : + "${subscription.subscriptionId}-${target_id}" => { + subscription_id = subscription.subscriptionId + target_id = target_id + } } - } - ]...) + ]...) + } applications_map_parameter_name = coalesce(var.applications_map_parameter_name, "/${var.project}/${var.environment}/${var.component}/applications-map") } diff --git a/infrastructure/terraform/components/callbacks/module_client_delivery.tf b/infrastructure/terraform/components/callbacks/module_client_delivery.tf new file mode 100644 index 00000000..78c23186 --- /dev/null +++ b/infrastructure/terraform/components/callbacks/module_client_delivery.tf @@ -0,0 +1,46 @@ +module "client_delivery" { + source = "../../modules/client-delivery" + for_each = local.config_clients + + project = var.project + aws_account_id = var.aws_account_id + region = var.region + component = var.component + environment = var.environment + group = var.group + + client_id = each.key + client_bus_name = aws_cloudwatch_event_bus.main.name + kms_key_arn = module.kms.key_arn + + subscriptions = local.client_subscriptions[each.key] + subscription_targets = local.client_subscription_targets[each.key] + + client_config_bucket = module.client_config_bucket.bucket + client_config_bucket_arn = module.client_config_bucket.arn + + applications_map_parameter_name = local.applications_map_parameter_name + + lambda_s3_bucket = local.acct.s3_buckets["lambda_function_artefacts"]["id"] + lambda_code_base_path = local.aws_lambda_functions_dir_path + + force_lambda_code_deploy = var.force_lambda_code_deploy + log_level = var.log_level + log_retention_in_days = var.log_retention_in_days + enable_xray_tracing = var.enable_xray_tracing + + log_destination_arn = local.log_destination_arn + log_subscription_role_arn = local.acct.log_subscription_role_arn + + elasticache_endpoint = aws_elasticache_serverless_cache.delivery_state.endpoint[0].address + elasticache_cache_name = aws_elasticache_serverless_cache.delivery_state.name + elasticache_iam_username = "${var.project}-${var.environment}-${var.component}-elasticache-user" + + mtls_cert_secret_arn = var.mtls_cert_secret_arn + mtls_test_cert_s3_bucket = var.mtls_test_certs_s3_bucket + mtls_test_cert_s3_key = var.mtls_test_cert_s3_key # gitleaks:allow + mtls_test_ca_s3_key = var.mtls_test_ca_s3_key # gitleaks:allow + + vpc_subnet_ids = local.acct.private_subnet_ids + lambda_security_group_id = aws_security_group.https_client_lambda.id +} diff --git a/infrastructure/terraform/components/callbacks/module_client_destination.tf b/infrastructure/terraform/components/callbacks/module_client_destination.tf deleted file mode 100644 index 21800e94..00000000 --- a/infrastructure/terraform/components/callbacks/module_client_destination.tf +++ /dev/null @@ -1,17 +0,0 @@ -module "client_destination" { - source = "../../modules/client-destination" - - project = var.project - aws_account_id = var.aws_account_id - region = var.region - component = var.component - environment = var.environment - client_bus_name = aws_cloudwatch_event_bus.main.name - - kms_key_arn = module.kms.key_arn - - targets = local.config_targets - subscriptions = local.config_subscriptions - subscription_targets = local.subscription_targets - -} diff --git a/infrastructure/terraform/components/callbacks/module_mock_webhook_alb_mtls.tf b/infrastructure/terraform/components/callbacks/module_mock_webhook_alb_mtls.tf new file mode 100644 index 00000000..c70189e9 --- /dev/null +++ b/infrastructure/terraform/components/callbacks/module_mock_webhook_alb_mtls.tf @@ -0,0 +1,136 @@ +resource "aws_security_group" "mock_webhook_alb" { + count = var.deploy_mock_clients ? 1 : 0 + name = "${local.csi}-mock-webhook-alb" + description = "Security group for mock webhook ALB mTLS endpoint" + vpc_id = local.acct.vpc_id + + tags = merge( + local.default_tags, + { + Name = "${local.csi}-mock-webhook-alb" + }, + ) +} + +resource "aws_vpc_security_group_ingress_rule" "mock_webhook_alb_https" { + count = var.deploy_mock_clients ? 1 : 0 + security_group_id = aws_security_group.mock_webhook_alb[0].id + referenced_security_group_id = aws_security_group.https_client_lambda.id + from_port = 443 + to_port = 443 + ip_protocol = "tcp" + description = "Allow HTTPS Client Lambda to reach mock webhook via mTLS" + tags = local.default_tags +} + +resource "aws_vpc_security_group_ingress_rule" "mock_webhook_alb_http" { + count = var.deploy_mock_clients ? 1 : 0 + security_group_id = aws_security_group.mock_webhook_alb[0].id + referenced_security_group_id = aws_security_group.https_client_lambda.id + from_port = 80 + to_port = 80 + ip_protocol = "tcp" + description = "Allow HTTPS Client Lambda to reach mock webhook without mTLS" + tags = local.default_tags +} + +resource "aws_vpc_security_group_egress_rule" "mock_webhook_alb_egress" { + count = var.deploy_mock_clients ? 1 : 0 + security_group_id = aws_security_group.mock_webhook_alb[0].id + ip_protocol = "-1" + cidr_ipv4 = "0.0.0.0/0" + tags = local.default_tags +} + +data "aws_s3_object" "mtls_mock_server_cert" { + count = var.deploy_mock_clients ? 1 : 0 + bucket = var.mtls_test_certs_s3_bucket + key = var.mtls_mock_server_cert_s3_key +} + +data "aws_s3_object" "mtls_mock_server_key" { + count = var.deploy_mock_clients ? 1 : 0 + bucket = var.mtls_test_certs_s3_bucket + key = var.mtls_mock_server_key_s3_key +} + +data "aws_s3_object" "mtls_ca_bundle" { + count = var.deploy_mock_clients ? 1 : 0 + bucket = var.mtls_test_certs_s3_bucket + key = var.mtls_test_ca_s3_key # gitleaks:allow +} + +resource "aws_acm_certificate" "mock_webhook_server" { + count = var.deploy_mock_clients ? 1 : 0 + certificate_body = data.aws_s3_object.mtls_mock_server_cert[0].body + private_key = data.aws_s3_object.mtls_mock_server_key[0].body + certificate_chain = data.aws_s3_object.mtls_ca_bundle[0].body + tags = local.default_tags +} + +resource "aws_lb" "mock_webhook_mtls" { + count = var.deploy_mock_clients ? 1 : 0 + name = substr("${local.csi}-mock-mtls", 0, 32) + internal = true + load_balancer_type = "application" + security_groups = [aws_security_group.mock_webhook_alb[0].id] + subnets = local.acct.private_subnet_ids + tags = local.default_tags +} + +resource "aws_lb_target_group" "mock_webhook_mtls" { + count = var.deploy_mock_clients ? 1 : 0 + name = substr("${local.csi}-mock-mtls", 0, 32) + target_type = "lambda" + tags = local.default_tags +} + +resource "aws_lambda_permission" "mock_webhook_mtls_alb" { + count = var.deploy_mock_clients ? 1 : 0 + statement_id = "AllowMtlsAlb" + action = "lambda:InvokeFunction" + function_name = module.mock_webhook_lambda[0].function_name + principal = "elasticloadbalancing.amazonaws.com" + source_arn = aws_lb_target_group.mock_webhook_mtls[0].arn +} + +resource "aws_lb_target_group_attachment" "mock_webhook_mtls" { + count = var.deploy_mock_clients ? 1 : 0 + target_group_arn = aws_lb_target_group.mock_webhook_mtls[0].arn + target_id = module.mock_webhook_lambda[0].function_arn + depends_on = [aws_lambda_permission.mock_webhook_mtls_alb] +} + +resource "aws_lb_listener" "mock_webhook_mtls" { + count = var.deploy_mock_clients ? 1 : 0 + load_balancer_arn = aws_lb.mock_webhook_mtls[0].arn + port = 443 + protocol = "HTTPS" + ssl_policy = "ELBSecurityPolicy-TLS13-1-2-2021-06" + certificate_arn = aws_acm_certificate.mock_webhook_server[0].arn + + mutual_authentication { + mode = "passthrough" + } + + default_action { + type = "forward" + target_group_arn = aws_lb_target_group.mock_webhook_mtls[0].arn + } + + tags = local.default_tags +} + +resource "aws_lb_listener" "mock_webhook_http" { + count = var.deploy_mock_clients ? 1 : 0 + load_balancer_arn = aws_lb.mock_webhook_mtls[0].arn + port = 80 + protocol = "HTTP" + + default_action { + type = "forward" + target_group_arn = aws_lb_target_group.mock_webhook_mtls[0].arn + } + + tags = local.default_tags +} diff --git a/infrastructure/terraform/components/callbacks/module_mock_webhook_lambda.tf b/infrastructure/terraform/components/callbacks/module_mock_webhook_lambda.tf index b951351e..467dc1c6 100644 --- a/infrastructure/terraform/components/callbacks/module_mock_webhook_lambda.tf +++ b/infrastructure/terraform/components/callbacks/module_mock_webhook_lambda.tf @@ -64,34 +64,3 @@ data "aws_iam_policy_document" "mock_webhook_lambda" { ] } } - -# Lambda Function URL for mock webhook (test/dev only) -resource "aws_lambda_function_url" "mock_webhook" { - count = var.deploy_mock_clients ? 1 : 0 - function_name = module.mock_webhook_lambda[0].function_name - authorization_type = "NONE" # Public endpoint for testing - - cors { - allow_origins = ["*"] - allow_methods = ["POST"] - allow_headers = ["*"] - max_age = 86400 - } -} - -resource "aws_lambda_permission" "mock_webhook_function_url" { - count = var.deploy_mock_clients ? 1 : 0 - statement_id_prefix = "FunctionURLAllowPublicAccess" - action = "lambda:InvokeFunctionUrl" - function_name = module.mock_webhook_lambda[0].function_name - principal = "*" - function_url_auth_type = "NONE" -} - -resource "aws_lambda_permission" "mock_webhook_function_invoke" { - count = var.deploy_mock_clients ? 1 : 0 - statement_id_prefix = "FunctionURLAllowInvokeAction" - action = "lambda:InvokeFunction" - function_name = module.mock_webhook_lambda[0].function_name - principal = "*" -} diff --git a/infrastructure/terraform/components/callbacks/module_transform_filter_lambda.tf b/infrastructure/terraform/components/callbacks/module_transform_filter_lambda.tf index fb1313f8..e7e69eb1 100644 --- a/infrastructure/terraform/components/callbacks/module_transform_filter_lambda.tf +++ b/infrastructure/terraform/components/callbacks/module_transform_filter_lambda.tf @@ -42,7 +42,6 @@ module "client_transform_filter_lambda" { CLIENT_SUBSCRIPTION_CONFIG_PREFIX = "client_subscriptions/" CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS = "60" MESSAGE_ROOT_URI = var.message_root_uri - APPLICATIONS_MAP_PARAMETER = local.applications_map_parameter_name } } @@ -87,19 +86,6 @@ data "aws_iam_policy_document" "client_transform_filter_lambda" { ] } - statement { - sid = "SSMApplicationsMapRead" - effect = "Allow" - - actions = [ - "ssm:GetParameter", - ] - - resources = [ - "arn:aws:ssm:${var.region}:${var.aws_account_id}:parameter${local.applications_map_parameter_name}", - ] - } - statement { sid = "CloudWatchMetrics" effect = "Allow" diff --git a/infrastructure/terraform/components/callbacks/pipes_pipe_main.tf b/infrastructure/terraform/components/callbacks/pipes_pipe_main.tf index 3fddfcca..ae914f4f 100644 --- a/infrastructure/terraform/components/callbacks/pipes_pipe_main.tf +++ b/infrastructure/terraform/components/callbacks/pipes_pipe_main.tf @@ -26,8 +26,7 @@ resource "aws_pipes_pipe" "main" { input_template = <, - "subscriptions": <$.subscriptions>, - "signatures": <$.signatures> + "subscriptions": <$.subscriptions> } EOF } diff --git a/infrastructure/terraform/components/callbacks/variables.tf b/infrastructure/terraform/components/callbacks/variables.tf index 74a72d24..cb32e8e3 100644 --- a/infrastructure/terraform/components/callbacks/variables.tf +++ b/infrastructure/terraform/components/callbacks/variables.tf @@ -177,3 +177,45 @@ variable "s3_enable_force_destroy" { description = "Whether to enable force destroy for the S3 buckets created in this module" default = false } + +variable "mtls_cert_secret_arn" { + type = string + description = "Secrets Manager ARN for the shared mTLS client certificate (production)" + default = "" +} + +variable "mtls_test_certs_s3_bucket" { + type = string + description = "S3 bucket containing test mTLS certificate material (dev)" + default = "" +} + +variable "mtls_test_cert_s3_key" { + type = string + description = "S3 key for the test mTLS client certificate bundle (dev)" + default = "" +} + +variable "mtls_test_ca_s3_key" { + type = string + description = "S3 key for the test CA certificate PEM bundle used for server verification and the mock webhook server cert chain" + default = "" +} + +variable "mtls_mock_server_cert_s3_key" { + type = string + description = "S3 key for the mock webhook server certificate PEM (signed by the test CA)" + default = "" +} + +variable "mtls_mock_server_key_s3_key" { + type = string + description = "S3 key for the mock webhook server private key PEM" + default = "" +} + +variable "elasticache_data_storage_maximum_gb" { + type = number + description = "Maximum data storage in GB for the ElastiCache Serverless delivery state cache" + default = 1 +} diff --git a/infrastructure/terraform/modules/client-delivery/README.md b/infrastructure/terraform/modules/client-delivery/README.md new file mode 100644 index 00000000..12dc05cc --- /dev/null +++ b/infrastructure/terraform/modules/client-delivery/README.md @@ -0,0 +1,69 @@ + + + + +## Requirements + +No requirements. +## Inputs + +| Name | Description | Type | Default | Required | +|------|-------------|------|---------|:--------:| +| [applications\_map\_parameter\_name](#input\_applications\_map\_parameter\_name) | SSM Parameter Store path for the clientId-to-applicationData map | `string` | n/a | yes | +| [aws\_account\_id](#input\_aws\_account\_id) | Account ID | `string` | n/a | yes | +| [client\_bus\_name](#input\_client\_bus\_name) | EventBridge bus name for subscription rules | `string` | n/a | yes | +| [client\_config\_bucket](#input\_client\_config\_bucket) | S3 bucket name containing client subscription configuration | `string` | n/a | yes | +| [client\_config\_bucket\_arn](#input\_client\_config\_bucket\_arn) | S3 bucket ARN containing client subscription configuration | `string` | n/a | yes | +| [client\_id](#input\_client\_id) | Unique identifier for this client | `string` | n/a | yes | +| [component](#input\_component) | Component name | `string` | n/a | yes | +| [elasticache\_cache\_name](#input\_elasticache\_cache\_name) | ElastiCache cache name for SigV4 token presigning | `string` | `""` | no | +| [elasticache\_endpoint](#input\_elasticache\_endpoint) | ElastiCache Serverless endpoint URL | `string` | `""` | no | +| [elasticache\_iam\_username](#input\_elasticache\_iam\_username) | IAM username for ElastiCache authentication | `string` | `""` | no | +| [enable\_xray\_tracing](#input\_enable\_xray\_tracing) | Enable AWS X-Ray active tracing for the Lambda function | `bool` | `false` | no | +| [environment](#input\_environment) | The name of the tfscaffold environment | `string` | n/a | yes | +| [force\_lambda\_code\_deploy](#input\_force\_lambda\_code\_deploy) | Force Lambda code redeployment even when commit tag matches | `bool` | `false` | no | +| [group](#input\_group) | The name of the tfscaffold group | `string` | `null` | no | +| [kms\_key\_arn](#input\_kms\_key\_arn) | KMS Key ARN for encryption at rest | `string` | n/a | yes | +| [lambda\_batch\_size](#input\_lambda\_batch\_size) | Number of SQS messages per Lambda invocation | `number` | `10` | no | +| [lambda\_code\_base\_path](#input\_lambda\_code\_base\_path) | Base path to Lambda source code directories | `string` | n/a | yes | +| [lambda\_memory](#input\_lambda\_memory) | Lambda memory allocation in MB | `number` | `256` | no | +| [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket for Lambda function artefacts | `string` | n/a | yes | +| [lambda\_security\_group\_id](#input\_lambda\_security\_group\_id) | Security group ID for the Lambda function | `string` | `""` | no | +| [lambda\_timeout](#input\_lambda\_timeout) | Lambda timeout in seconds | `number` | `30` | no | +| [log\_destination\_arn](#input\_log\_destination\_arn) | Firehose destination ARN for log forwarding | `string` | `""` | no | +| [log\_level](#input\_log\_level) | Log level for the Lambda function | `string` | `"INFO"` | no | +| [log\_retention\_in\_days](#input\_log\_retention\_in\_days) | CloudWatch log retention period in days | `number` | `0` | no | +| [log\_subscription\_role\_arn](#input\_log\_subscription\_role\_arn) | IAM role ARN for CloudWatch log subscription | `string` | `""` | no | +| [max\_retry\_duration\_seconds](#input\_max\_retry\_duration\_seconds) | Maximum retry window before messages are sent to DLQ | `number` | `7200` | no | +| [mtls\_cert\_secret\_arn](#input\_mtls\_cert\_secret\_arn) | Secrets Manager ARN for the mTLS client certificate | `string` | `""` | no | +| [mtls\_test\_ca\_s3\_key](#input\_mtls\_test\_ca\_s3\_key) | S3 key for dev CA certificate PEM bundle used for server verification | `string` | `""` | no | +| [mtls\_test\_cert\_s3\_bucket](#input\_mtls\_test\_cert\_s3\_bucket) | S3 bucket for dev mTLS test certificates | `string` | `""` | no | +| [mtls\_test\_cert\_s3\_key](#input\_mtls\_test\_cert\_s3\_key) | S3 key for dev mTLS test certificate bundle | `string` | `""` | no | +| [project](#input\_project) | The name of the tfscaffold project | `string` | n/a | yes | +| [region](#input\_region) | AWS Region | `string` | n/a | yes | +| [sqs\_max\_receive\_count](#input\_sqs\_max\_receive\_count) | Maximum receive count before message moves to DLQ | `number` | `100` | no | +| [sqs\_visibility\_timeout\_seconds](#input\_sqs\_visibility\_timeout\_seconds) | Visibility timeout for the per-client delivery queue | `number` | `60` | no | +| [subscription\_targets](#input\_subscription\_targets) | Flattened subscription-target fanout map keyed by subscription-target composite key |
map(object({
subscription_id = string
target_id = string
}))
| n/a | yes | +| [subscriptions](#input\_subscriptions) | Subscription definitions for this client, keyed by subscription\_id |
map(object({
subscription_id = string
target_ids = list(string)
}))
| n/a | yes | +| [vpc\_subnet\_ids](#input\_vpc\_subnet\_ids) | VPC subnet IDs for Lambda execution | `list(string)` | `[]` | no | +## Modules + +| Name | Source | Version | +|------|--------|---------| +| [dlq\_delivery](#module\_dlq\_delivery) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-sqs.zip | n/a | +| [https\_client\_lambda](#module\_https\_client\_lambda) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-lambda.zip | n/a | +| [sqs\_delivery](#module\_sqs\_delivery) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.9/terraform-sqs.zip | n/a | +## Outputs + +| Name | Description | +|------|-------------| +| [delivery\_queue\_arn](#output\_delivery\_queue\_arn) | ARN of the per-client delivery SQS queue | +| [delivery\_queue\_url](#output\_delivery\_queue\_url) | URL of the per-client delivery SQS queue | +| [dlq\_arn](#output\_dlq\_arn) | ARN of the per-client delivery DLQ | +| [dlq\_url](#output\_dlq\_url) | URL of the per-client delivery DLQ | +| [lambda\_execution\_role\_arn](#output\_lambda\_execution\_role\_arn) | ARN of the Lambda execution IAM role | +| [lambda\_function\_arn](#output\_lambda\_function\_arn) | ARN of the per-client HTTPS Client Lambda function | +| [lambda\_function\_name](#output\_lambda\_function\_name) | Name of the per-client HTTPS Client Lambda function | + + + diff --git a/infrastructure/terraform/modules/client-delivery/cloudwatch_event_rule_per_subscription.tf b/infrastructure/terraform/modules/client-delivery/cloudwatch_event_rule_per_subscription.tf new file mode 100644 index 00000000..fa2e02e9 --- /dev/null +++ b/infrastructure/terraform/modules/client-delivery/cloudwatch_event_rule_per_subscription.tf @@ -0,0 +1,101 @@ +resource "aws_cloudwatch_event_rule" "per_subscription" { + for_each = var.subscriptions + + name = "${local.client_prefix}-${each.key}" + description = "Client Callbacks event rule for client ${var.client_id} subscription ${each.key}" + event_bus_name = var.client_bus_name + + event_pattern = jsonencode({ + "detail" : { + "subscriptions" : [each.value.subscription_id] + } + }) + + tags = local.default_tags +} + +resource "aws_cloudwatch_event_target" "per_subscription_target" { + for_each = var.subscription_targets + + rule = aws_cloudwatch_event_rule.per_subscription[each.value.subscription_id].name + arn = module.sqs_delivery.sqs_queue_arn + target_id = "${local.client_prefix}-${each.value.target_id}" + event_bus_name = var.client_bus_name + role_arn = aws_iam_role.eventbridge_sqs_target.arn + + sqs_target { + message_group_id = null + } + + input_transformer { + input_paths = { + payload = "$.detail.payload" + } + + input_template = "{\"payload\": , \"subscriptionId\": \"${each.value.subscription_id}\", \"targetId\": \"${each.value.target_id}\"}" + } + + dead_letter_config { + arn = module.dlq_delivery.sqs_queue_arn + } + + retry_policy { + maximum_retry_attempts = 0 + maximum_event_age_in_seconds = 60 + } +} + +resource "aws_iam_role" "eventbridge_sqs_target" { + name = "${local.client_prefix}-eb-sqs-role" + description = "Role for EventBridge to send messages to per-client SQS queue" + assume_role_policy = data.aws_iam_policy_document.eventbridge_sqs_assume.json + + tags = local.default_tags +} + +data "aws_iam_policy_document" "eventbridge_sqs_assume" { + statement { + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["events.amazonaws.com"] + } + } +} + +resource "aws_iam_role_policy" "eventbridge_sqs_send" { + name = "sqs-send" + role = aws_iam_role.eventbridge_sqs_target.id + policy = data.aws_iam_policy_document.eventbridge_sqs_send.json +} + +data "aws_iam_policy_document" "eventbridge_sqs_send" { + statement { + sid = "AllowSQSSendMessage" + effect = "Allow" + + actions = [ + "sqs:SendMessage", + ] + + resources = [ + module.sqs_delivery.sqs_queue_arn, + module.dlq_delivery.sqs_queue_arn, + ] + } + + statement { + sid = "AllowKMSForSQS" + effect = "Allow" + + actions = [ + "kms:Decrypt", + "kms:GenerateDataKey", + ] + + resources = [ + var.kms_key_arn, + ] + } +} diff --git a/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf b/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf new file mode 100644 index 00000000..9d2348a0 --- /dev/null +++ b/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf @@ -0,0 +1,115 @@ +data "aws_iam_policy_document" "https_client_lambda" { + statement { + sid = "KMSPermissions" + effect = "Allow" + + actions = [ + "kms:Decrypt", + "kms:GenerateDataKey", + ] + + resources = [ + var.kms_key_arn, + ] + } + + statement { + sid = "SQSDeliveryQueueConsume" + effect = "Allow" + + actions = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes", + "sqs:ChangeMessageVisibility", + ] + + resources = [ + module.sqs_delivery.sqs_queue_arn, + ] + } + + statement { + sid = "SQSDLQSend" + effect = "Allow" + + actions = [ + "sqs:SendMessage", + ] + + resources = [ + module.dlq_delivery.sqs_queue_arn, + ] + } + + statement { + sid = "SSMGetApplicationsMap" + effect = "Allow" + + actions = [ + "ssm:GetParameter", + ] + + resources = [ + "arn:aws:ssm:${var.region}:${var.aws_account_id}:parameter${var.applications_map_parameter_name}", + ] + } + + statement { + sid = "S3ClientConfigReadAccess" + effect = "Allow" + + actions = [ + "s3:GetObject", + ] + + resources = [ + "${var.client_config_bucket_arn}/client_subscriptions/*", + ] + } + + statement { + sid = "S3ClientConfigListAccess" + effect = "Allow" + + actions = [ + "s3:ListBucket", + ] + + resources = [ + var.client_config_bucket_arn, + ] + } + + dynamic "statement" { + for_each = var.mtls_cert_secret_arn != "" ? [1] : [] + content { + sid = "SecretsManagerMTLSCert" + effect = "Allow" + + actions = [ + "secretsmanager:GetSecretValue", + ] + + resources = [ + var.mtls_cert_secret_arn, + ] + } + } + + dynamic "statement" { + for_each = var.mtls_test_cert_s3_bucket != "" ? [1] : [] + content { + sid = "S3MTLSTestCertReadAccess" + effect = "Allow" + + actions = [ + "s3:GetObject", + ] + + resources = [ + "arn:aws:s3:::${var.mtls_test_cert_s3_bucket}/${var.mtls_test_cert_s3_key}", + ] + } + } +} diff --git a/infrastructure/terraform/modules/client-delivery/locals.tf b/infrastructure/terraform/modules/client-delivery/locals.tf new file mode 100644 index 00000000..6ca35137 --- /dev/null +++ b/infrastructure/terraform/modules/client-delivery/locals.tf @@ -0,0 +1,21 @@ +locals { + csi = replace( + format( + "%s-%s-%s", + var.project, + var.environment, + var.component, + ), + "_", + "", + ) + + client_prefix = "${local.csi}-${var.client_id}" + + default_tags = { + Project = var.project + Environment = var.environment + Component = var.component + Client = var.client_id + } +} diff --git a/infrastructure/terraform/modules/client-delivery/module_dlq_per_client.tf b/infrastructure/terraform/modules/client-delivery/module_dlq_per_client.tf new file mode 100644 index 00000000..84c410dd --- /dev/null +++ b/infrastructure/terraform/modules/client-delivery/module_dlq_per_client.tf @@ -0,0 +1,43 @@ +module "dlq_delivery" { + source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-sqs.zip" + + aws_account_id = var.aws_account_id + component = var.component + environment = var.environment + project = var.project + region = var.region + name = "${var.client_id}-delivery-dlq" + + sqs_kms_key_arn = var.kms_key_arn + + create_dlq = false +} + +resource "aws_cloudwatch_metric_alarm" "dlq_depth" { + alarm_name = "${local.client_prefix}-dlq-depth" + alarm_description = join(" ", [ + "RELIABILITY: Messages are in DLQ for client ${var.client_id}.", + "Failed callback deliveries require operator attention.", + ]) + + comparison_operator = "GreaterThanThreshold" + evaluation_periods = 1 + metric_name = "ApproximateNumberOfMessagesVisible" + namespace = "AWS/SQS" + period = 300 + statistic = "Sum" + threshold = 0 + actions_enabled = true + treat_missing_data = "notBreaching" + + dimensions = { + QueueName = "${local.client_prefix}-delivery-dlq-queue" + } + + tags = merge( + local.default_tags, + { + Name = "${local.client_prefix}-dlq-depth" + }, + ) +} diff --git a/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf b/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf new file mode 100644 index 00000000..1260d471 --- /dev/null +++ b/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf @@ -0,0 +1,71 @@ +module "https_client_lambda" { + source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-lambda.zip" + + function_name = "https-client-${var.client_id}" + description = "HTTPS delivery Lambda for client ${var.client_id}" + + aws_account_id = var.aws_account_id + component = var.component + environment = var.environment + project = var.project + region = var.region + group = var.group + + log_retention_in_days = var.log_retention_in_days + kms_key_arn = var.kms_key_arn + + iam_policy_document = { + body = data.aws_iam_policy_document.https_client_lambda.json + } + + function_s3_bucket = var.lambda_s3_bucket + function_code_base_path = var.lambda_code_base_path + function_code_dir = "https-client-lambda/dist" + function_include_common = true + handler_function_name = "handler" + runtime = "nodejs22.x" + memory = var.lambda_memory + timeout = var.lambda_timeout + log_level = var.log_level + + force_lambda_code_deploy = var.force_lambda_code_deploy + enable_lambda_insights = false + enable_xray_tracing = var.enable_xray_tracing + + log_destination_arn = var.log_destination_arn + log_subscription_role_arn = var.log_subscription_role_arn + + lambda_env_vars = { + APPLICATIONS_MAP_PARAMETER = var.applications_map_parameter_name + CLIENT_ID = var.client_id + CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS = "60" + CLIENT_SUBSCRIPTION_CONFIG_BUCKET = var.client_config_bucket + CLIENT_SUBSCRIPTION_CONFIG_PREFIX = "client_subscriptions/" + DLQ_URL = module.dlq_delivery.sqs_queue_url + ELASTICACHE_CACHE_NAME = var.elasticache_cache_name + ELASTICACHE_ENDPOINT = var.elasticache_endpoint + ELASTICACHE_IAM_USERNAME = var.elasticache_iam_username + ENVIRONMENT = var.environment + MAX_RETRY_DURATION_SECONDS = tostring(var.max_retry_duration_seconds) + METRICS_NAMESPACE = "nhs-notify-client-callbacks" + MTLS_CERT_SECRET_ARN = var.mtls_cert_secret_arn + MTLS_TEST_CA_S3_KEY = var.mtls_test_ca_s3_key # gitleaks:allow + MTLS_TEST_CERT_S3_BUCKET = var.mtls_test_cert_s3_bucket + MTLS_TEST_CERT_S3_KEY = var.mtls_test_cert_s3_key # gitleaks:allow + QUEUE_URL = module.sqs_delivery.sqs_queue_url + } + + vpc_config = var.lambda_security_group_id != "" ? { + subnet_ids = var.vpc_subnet_ids + security_group_ids = [var.lambda_security_group_id] + } : null +} + +resource "aws_lambda_event_source_mapping" "sqs_delivery" { + event_source_arn = module.sqs_delivery.sqs_queue_arn + function_name = module.https_client_lambda.function_arn + batch_size = var.lambda_batch_size + enabled = true + + function_response_types = ["ReportBatchItemFailures"] +} diff --git a/infrastructure/terraform/modules/client-delivery/module_sqs_per_client.tf b/infrastructure/terraform/modules/client-delivery/module_sqs_per_client.tf new file mode 100644 index 00000000..5811f8b6 --- /dev/null +++ b/infrastructure/terraform/modules/client-delivery/module_sqs_per_client.tf @@ -0,0 +1,39 @@ +module "sqs_delivery" { + source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.9/terraform-sqs.zip" + + aws_account_id = var.aws_account_id + component = var.component + environment = var.environment + project = var.project + region = var.region + name = "${var.client_id}-delivery" + + sqs_kms_key_arn = var.kms_key_arn + + visibility_timeout_seconds = var.sqs_visibility_timeout_seconds + max_receive_count = var.sqs_max_receive_count + + create_dlq = false + + sqs_policy_overload = data.aws_iam_policy_document.sqs_delivery.json +} + +data "aws_iam_policy_document" "sqs_delivery" { + statement { + sid = "AllowEventBridgeToSendMessage" + effect = "Allow" + + principals { + type = "Service" + identifiers = ["events.amazonaws.com"] + } + + actions = [ + "sqs:SendMessage", + ] + + resources = [ + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.csi}-${var.client_id}-delivery-queue", + ] + } +} diff --git a/infrastructure/terraform/modules/client-delivery/outputs.tf b/infrastructure/terraform/modules/client-delivery/outputs.tf new file mode 100644 index 00000000..727ae19d --- /dev/null +++ b/infrastructure/terraform/modules/client-delivery/outputs.tf @@ -0,0 +1,34 @@ +output "delivery_queue_arn" { + description = "ARN of the per-client delivery SQS queue" + value = module.sqs_delivery.sqs_queue_arn +} + +output "delivery_queue_url" { + description = "URL of the per-client delivery SQS queue" + value = module.sqs_delivery.sqs_queue_url +} + +output "dlq_arn" { + description = "ARN of the per-client delivery DLQ" + value = module.dlq_delivery.sqs_queue_arn +} + +output "dlq_url" { + description = "URL of the per-client delivery DLQ" + value = module.dlq_delivery.sqs_queue_url +} + +output "lambda_function_name" { + description = "Name of the per-client HTTPS Client Lambda function" + value = module.https_client_lambda.function_name +} + +output "lambda_function_arn" { + description = "ARN of the per-client HTTPS Client Lambda function" + value = module.https_client_lambda.function_arn +} + +output "lambda_execution_role_arn" { + description = "ARN of the Lambda execution IAM role" + value = module.https_client_lambda.iam_role_arn +} diff --git a/infrastructure/terraform/modules/client-delivery/variables.tf b/infrastructure/terraform/modules/client-delivery/variables.tf new file mode 100644 index 00000000..aabdcda6 --- /dev/null +++ b/infrastructure/terraform/modules/client-delivery/variables.tf @@ -0,0 +1,212 @@ +variable "project" { + type = string + description = "The name of the tfscaffold project" +} + +variable "environment" { + type = string + description = "The name of the tfscaffold environment" +} + +variable "component" { + type = string + description = "Component name" +} + +variable "aws_account_id" { + type = string + description = "Account ID" +} + +variable "region" { + type = string + description = "AWS Region" +} + +variable "group" { + type = string + description = "The name of the tfscaffold group" + default = null +} + +variable "client_id" { + type = string + description = "Unique identifier for this client" +} + +variable "kms_key_arn" { + type = string + description = "KMS Key ARN for encryption at rest" +} + +variable "client_bus_name" { + type = string + description = "EventBridge bus name for subscription rules" +} + +variable "subscriptions" { + type = map(object({ + subscription_id = string + target_ids = list(string) + })) + description = "Subscription definitions for this client, keyed by subscription_id" +} + +variable "subscription_targets" { + type = map(object({ + subscription_id = string + target_id = string + })) + description = "Flattened subscription-target fanout map keyed by subscription-target composite key" +} + +variable "client_config_bucket" { + type = string + description = "S3 bucket name containing client subscription configuration" +} + +variable "client_config_bucket_arn" { + type = string + description = "S3 bucket ARN containing client subscription configuration" +} + +variable "applications_map_parameter_name" { + type = string + description = "SSM Parameter Store path for the clientId-to-applicationData map" +} + +variable "lambda_s3_bucket" { + type = string + description = "S3 bucket for Lambda function artefacts" +} + +variable "lambda_code_base_path" { + type = string + description = "Base path to Lambda source code directories" +} + +variable "force_lambda_code_deploy" { + type = bool + description = "Force Lambda code redeployment even when commit tag matches" + default = false +} + +variable "log_level" { + type = string + description = "Log level for the Lambda function" + default = "INFO" +} + +variable "log_retention_in_days" { + type = number + description = "CloudWatch log retention period in days" + default = 0 +} + +variable "log_destination_arn" { + type = string + description = "Firehose destination ARN for log forwarding" + default = "" +} + +variable "log_subscription_role_arn" { + type = string + description = "IAM role ARN for CloudWatch log subscription" + default = "" +} + +variable "lambda_batch_size" { + type = number + description = "Number of SQS messages per Lambda invocation" + default = 10 +} + +variable "lambda_memory" { + type = number + description = "Lambda memory allocation in MB" + default = 256 +} + +variable "lambda_timeout" { + type = number + description = "Lambda timeout in seconds" + default = 30 +} + +variable "max_retry_duration_seconds" { + type = number + description = "Maximum retry window before messages are sent to DLQ" + default = 7200 +} + +variable "sqs_visibility_timeout_seconds" { + type = number + description = "Visibility timeout for the per-client delivery queue" + default = 60 +} + +variable "sqs_max_receive_count" { + type = number + description = "Maximum receive count before message moves to DLQ" + default = 100 +} + +variable "enable_xray_tracing" { + type = bool + description = "Enable AWS X-Ray active tracing for the Lambda function" + default = false +} + +variable "mtls_cert_secret_arn" { + type = string + description = "Secrets Manager ARN for the mTLS client certificate" + default = "" +} + +variable "mtls_test_cert_s3_bucket" { + type = string + description = "S3 bucket for dev mTLS test certificates" + default = "" +} + +variable "mtls_test_cert_s3_key" { + type = string + description = "S3 key for dev mTLS test certificate bundle" + default = "" +} + +variable "mtls_test_ca_s3_key" { + type = string + description = "S3 key for dev CA certificate PEM bundle used for server verification" + default = "" +} + +variable "elasticache_endpoint" { + type = string + description = "ElastiCache Serverless endpoint URL" + default = "" +} + +variable "elasticache_cache_name" { + type = string + description = "ElastiCache cache name for SigV4 token presigning" + default = "" +} + +variable "elasticache_iam_username" { + type = string + description = "IAM username for ElastiCache authentication" + default = "" +} + +variable "vpc_subnet_ids" { + type = list(string) + description = "VPC subnet IDs for Lambda execution" + default = [] +} + +variable "lambda_security_group_id" { + type = string + description = "Security group ID for the Lambda function" + default = "" +} diff --git a/infrastructure/terraform/modules/client-destination/README.md b/infrastructure/terraform/modules/client-destination/README.md deleted file mode 100644 index 11b689c3..00000000 --- a/infrastructure/terraform/modules/client-destination/README.md +++ /dev/null @@ -1,32 +0,0 @@ - - - - -## Requirements - -No requirements. -## Inputs - -| Name | Description | Type | Default | Required | -|------|-------------|------|---------|:--------:| -| [aws\_account\_id](#input\_aws\_account\_id) | Account ID | `string` | n/a | yes | -| [client\_bus\_name](#input\_client\_bus\_name) | EventBus name where you create the rule | `string` | n/a | yes | -| [component](#input\_component) | Component name | `string` | n/a | yes | -| [environment](#input\_environment) | The name of the tfscaffold environment | `string` | n/a | yes | -| [kms\_key\_arn](#input\_kms\_key\_arn) | KMS Key ARN | `string` | n/a | yes | -| [project](#input\_project) | The name of the tfscaffold project | `string` | n/a | yes | -| [region](#input\_region) | AWS Region | `string` | n/a | yes | -| [subscription\_targets](#input\_subscription\_targets) | Flattened subscription-target fanout map keyed by subscription-target composite key |
map(object({
subscription_id = string
target_id = string
}))
| n/a | yes | -| [subscriptions](#input\_subscriptions) | Flattened subscription definitions keyed by subscription\_id |
map(object({
client_id = string
subscription_id = string
target_ids = list(string)
}))
| n/a | yes | -| [targets](#input\_targets) | Flattened target definitions keyed by target\_id |
map(object({
client_id = string
target_id = string
invocation_endpoint = string
invocation_rate_limit_per_second = number
http_method = string
header_name = string
header_value = string
}))
| n/a | yes | -## Modules - -| Name | Source | Version | -|------|--------|---------| -| [target\_dlq](#module\_target\_dlq) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-sqs.zip | n/a | -## Outputs - -No outputs. - - - diff --git a/infrastructure/terraform/modules/client-destination/cloudwatch_event_api_destination_this.tf b/infrastructure/terraform/modules/client-destination/cloudwatch_event_api_destination_this.tf deleted file mode 100644 index 4bec92cc..00000000 --- a/infrastructure/terraform/modules/client-destination/cloudwatch_event_api_destination_this.tf +++ /dev/null @@ -1,10 +0,0 @@ -resource "aws_cloudwatch_event_api_destination" "per_target" { - for_each = var.targets - - name = "${local.csi}-${each.key}" - description = "API Destination for ${each.key}" - invocation_endpoint = each.value.invocation_endpoint - http_method = each.value.http_method - invocation_rate_limit_per_second = each.value.invocation_rate_limit_per_second - connection_arn = aws_cloudwatch_event_connection.per_target[each.key].arn -} diff --git a/infrastructure/terraform/modules/client-destination/cloudwatch_event_connection_main.tf b/infrastructure/terraform/modules/client-destination/cloudwatch_event_connection_main.tf deleted file mode 100644 index 7546d666..00000000 --- a/infrastructure/terraform/modules/client-destination/cloudwatch_event_connection_main.tf +++ /dev/null @@ -1,14 +0,0 @@ -resource "aws_cloudwatch_event_connection" "per_target" { - for_each = var.targets - - name = "${local.csi}-${each.key}" - description = "Event Connection which would be used by API Destination ${each.key}" - authorization_type = "API_KEY" - - auth_parameters { - api_key { - key = each.value.header_name - value = each.value.header_value - } - } -} diff --git a/infrastructure/terraform/modules/client-destination/cloudwatch_event_rule_main.tf b/infrastructure/terraform/modules/client-destination/cloudwatch_event_rule_main.tf deleted file mode 100644 index bdf7ea47..00000000 --- a/infrastructure/terraform/modules/client-destination/cloudwatch_event_rule_main.tf +++ /dev/null @@ -1,46 +0,0 @@ -resource "aws_cloudwatch_event_rule" "per_subscription" { - for_each = var.subscriptions - - name = "${local.csi}-${each.key}" - description = "Client Callbacks event rule for subscription ${each.key}" - event_bus_name = var.client_bus_name - - event_pattern = jsonencode({ - "detail" : { - "subscriptions" : [each.value.subscription_id] - } - }) -} - -resource "aws_cloudwatch_event_target" "per_subscription_target" { - for_each = var.subscription_targets - - rule = aws_cloudwatch_event_rule.per_subscription[each.value.subscription_id].name - arn = aws_cloudwatch_event_api_destination.per_target[each.value.target_id].arn - target_id = "${local.csi}-${each.value.target_id}" - role_arn = aws_iam_role.api_target_role.arn - event_bus_name = var.client_bus_name - - dead_letter_config { - arn = module.target_dlq[each.value.target_id].sqs_queue_arn - } - - input_transformer { - input_paths = { - data = "$.detail.payload.data" - } - - input_template = "{\"data\": }" - } - - http_target { - header_parameters = { - "x-hmac-sha256-signature" = "$.detail.signatures.${replace(each.value.target_id, "-", "_")}" - } - } - - retry_policy { - maximum_retry_attempts = 3 - maximum_event_age_in_seconds = 3600 - } -} diff --git a/infrastructure/terraform/modules/client-destination/iam_role_api_target_role.tf b/infrastructure/terraform/modules/client-destination/iam_role_api_target_role.tf deleted file mode 100644 index 1158a2b2..00000000 --- a/infrastructure/terraform/modules/client-destination/iam_role_api_target_role.tf +++ /dev/null @@ -1,83 +0,0 @@ -resource "aws_iam_role" "api_target_role" { - name = "${local.csi}-api-target-target-role" - description = "Role for client target rule" - assume_role_policy = data.aws_iam_policy_document.api_target_role_assume_role_policy.json -} - -data "aws_iam_policy_document" "api_target_role_assume_role_policy" { - statement { - actions = [ - "sts:AssumeRole" - ] - - principals { - type = "Service" - identifiers = ["events.amazonaws.com"] - } - } -} - -resource "aws_iam_role_policy_attachment" "api_target_role" { - role = aws_iam_role.api_target_role.id - policy_arn = aws_iam_policy.api_target_role.arn -} - -resource "aws_iam_policy" "api_target_role" { - name = "${local.csi}-api-target-role-policy" - description = "IAM Policy for the client target role" - path = "/" - policy = data.aws_iam_policy_document.api_target_role.json -} - -data "aws_iam_policy_document" "api_target_role" { - dynamic "statement" { - for_each = length(aws_cloudwatch_event_api_destination.per_target) > 0 ? [1] : [] - content { - sid = "AllowAPIDestinationAccess" - effect = "Allow" - - actions = [ - "events:InvokeApiDestination", - ] - - resources = [ - for destination in aws_cloudwatch_event_api_destination.per_target : - destination.arn - ] - } - } - - dynamic "statement" { - for_each = length(module.target_dlq) > 0 ? [1] : [] - content { - sid = "AllowSQSSendMessageForDLQ" - effect = "Allow" - - actions = [ - "sqs:SendMessage", - ] - - resources = [ - for dlq in module.target_dlq : - dlq.sqs_queue_arn - ] - } - } - - statement { - sid = "AllowKMSForDLQ" - effect = "Allow" - - actions = [ - "kms:ReEncrypt*", - "kms:GenerateDataKey*", - "kms:Encrypt", - "kms:DescribeKey", - "kms:Decrypt" - ] - - resources = [ - var.kms_key_arn, - ] - } -} diff --git a/infrastructure/terraform/modules/client-destination/locals.tf b/infrastructure/terraform/modules/client-destination/locals.tf deleted file mode 100644 index fe672990..00000000 --- a/infrastructure/terraform/modules/client-destination/locals.tf +++ /dev/null @@ -1,12 +0,0 @@ -locals { - csi = replace( - format( - "%s-%s-%s", - var.project, - var.environment, - var.component, - ), - "_", - "", - ) -} diff --git a/infrastructure/terraform/modules/client-destination/module_target_dlq.tf b/infrastructure/terraform/modules/client-destination/module_target_dlq.tf deleted file mode 100644 index 36c4c277..00000000 --- a/infrastructure/terraform/modules/client-destination/module_target_dlq.tf +++ /dev/null @@ -1,41 +0,0 @@ -module "target_dlq" { - source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-sqs.zip" - for_each = var.targets - - aws_account_id = var.aws_account_id - component = var.component - environment = var.environment - project = var.project - region = var.region - name = "${each.key}-dlq" - - sqs_kms_key_arn = var.kms_key_arn - - visibility_timeout_seconds = 60 - - create_dlq = false - - sqs_policy_overload = data.aws_iam_policy_document.target_dlq[each.key].json -} - -data "aws_iam_policy_document" "target_dlq" { - for_each = var.targets - - statement { - sid = "AllowEventBridgeToSendMessage" - effect = "Allow" - - principals { - type = "Service" - identifiers = ["events.amazonaws.com"] - } - - actions = [ - "sqs:SendMessage" - ] - - resources = [ - "arn:aws:sqs:${var.region}:${var.aws_account_id}:${var.project}-${var.environment}-${var.component}-${each.key}-dlq-queue" - ] - } -} diff --git a/infrastructure/terraform/modules/client-destination/variables.tf b/infrastructure/terraform/modules/client-destination/variables.tf deleted file mode 100644 index 2b9a0ceb..00000000 --- a/infrastructure/terraform/modules/client-destination/variables.tf +++ /dev/null @@ -1,67 +0,0 @@ -variable "project" { - type = string - description = "The name of the tfscaffold project" -} - -variable "environment" { - type = string - description = "The name of the tfscaffold environment" -} - -variable "component" { - type = string - description = "Component name" -} - -variable "aws_account_id" { - type = string - description = "Account ID" -} - -variable "region" { - type = string - description = "AWS Region" -} - -variable "targets" { - type = map(object({ - client_id = string - target_id = string - invocation_endpoint = string - invocation_rate_limit_per_second = number - http_method = string - header_name = string - header_value = string - })) - - description = "Flattened target definitions keyed by target_id" -} - -variable "subscriptions" { - type = map(object({ - client_id = string - subscription_id = string - target_ids = list(string) - })) - - description = "Flattened subscription definitions keyed by subscription_id" -} - -variable "subscription_targets" { - type = map(object({ - subscription_id = string - target_id = string - })) - - description = "Flattened subscription-target fanout map keyed by subscription-target composite key" -} - -variable "client_bus_name" { - type = string - description = "EventBus name where you create the rule" -} - -variable "kms_key_arn" { - type = string - description = "KMS Key ARN" -} diff --git a/knip.ts b/knip.ts index 3dd626cb..44eb1e8d 100644 --- a/knip.ts +++ b/knip.ts @@ -32,9 +32,16 @@ const config: KnipConfig = { // Resolved transitively through tsconfig.base.json → @tsconfig/node22 ignoreDependencies: ["@tsconfig/node22"], }, + "lambdas/https-client-lambda": { + ignoreDependencies: ["@tsconfig/node22"], + entry: ["src/__tests__/**/*.ts"], + }, "lambdas/mock-webhook-lambda": { ignoreDependencies: ["@tsconfig/node22"], }, + "src/config-cache": { + ignoreDependencies: ["@tsconfig/node22"], + }, "src/logger": { ignoreDependencies: ["@tsconfig/node22"], }, @@ -42,6 +49,7 @@ const config: KnipConfig = { ignoreDependencies: ["@tsconfig/node22"], }, "tests/integration": { + entry: ["helpers/**/*.ts"], ignoreDependencies: [ "@tsconfig/node22", // Used in helpers/sqs.ts and helpers/cloudwatch.ts; flagged because diff --git a/lambdas/client-transform-filter-lambda/package.json b/lambdas/client-transform-filter-lambda/package.json index 266911da..d9c81d27 100644 --- a/lambdas/client-transform-filter-lambda/package.json +++ b/lambdas/client-transform-filter-lambda/package.json @@ -1,7 +1,7 @@ { "dependencies": { "@aws-sdk/client-s3": "catalog:aws", - "@aws-sdk/client-ssm": "catalog:aws", + "@nhs-notify-client-callbacks/config-cache": "workspace:*", "@nhs-notify-client-callbacks/logger": "workspace:*", "@nhs-notify-client-callbacks/models": "workspace:*", "aws-embedded-metrics": "catalog:app", diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/helpers/client-subscription-fixtures.ts b/lambdas/client-transform-filter-lambda/src/__tests__/helpers/client-subscription-fixtures.ts index 9491292c..7713813e 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/helpers/client-subscription-fixtures.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/helpers/client-subscription-fixtures.ts @@ -28,6 +28,8 @@ export const createTarget = ( headerValue: "secret", ...overrides.apiKey, }, + mtls: { enabled: false }, + certPinning: { enabled: false }, ...overrides, }); diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/index.component.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/index.component.test.ts index b46c49f8..b234a244 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/index.component.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/index.component.test.ts @@ -15,26 +15,12 @@ jest.mock("@aws-sdk/client-s3", () => { }; }); -const mockSsmSend = jest.fn(); -jest.mock("@aws-sdk/client-ssm", () => { - const actual = jest.requireActual("@aws-sdk/client-ssm"); - return { - ...actual, - SSMClient: jest.fn().mockImplementation(() => ({ - send: mockSsmSend, - })), - }; -}); - -// Set environment variables before importing the handler/module under test so that -// services constructed at module import time (e.g. applicationsMapService) see -// the correct configuration. +// Set environment variables before importing the handler/module under test. process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET = "test-bucket"; process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX = "client_subscriptions/"; process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS = "60"; process.env.METRICS_NAMESPACE = "test-namespace"; process.env.ENVIRONMENT = "test"; -process.env.APPLICATIONS_MAP_PARAMETER = "/test/applications-map"; jest.mock("aws-embedded-metrics", () => ({ createMetricsLogger: jest.fn(() => ({ @@ -50,12 +36,11 @@ jest.mock("aws-embedded-metrics", () => ({ })); import { GetObjectCommand, NoSuchKey } from "@aws-sdk/client-s3"; -import { GetParameterCommand } from "@aws-sdk/client-ssm"; import type { SQSRecord } from "aws-lambda"; import { EventTypes } from "@nhs-notify-client-callbacks/models"; import { createMessageStatusConfig } from "__tests__/helpers/client-subscription-fixtures"; import { createS3Client } from "services/config-loader-service"; -import { applicationsMapService, configLoaderService, handler } from ".."; +import { configLoaderService, handler } from ".."; const makeSqsRecord = (body: object): SQSRecord => ({ messageId: "sqs-id", @@ -104,18 +89,8 @@ const validMessageStatusEvent = (clientId: string, messageStatus: string) => ({ }); describe("Lambda handler with S3 subscription filtering", () => { - const applicationsMap = JSON.stringify({ - "client-1": "app-id-1", - "client-a": "app-id-a", - "client-b": "app-id-b", - "client-no-config": "app-id-no-config", - }); - beforeEach(() => { mockSend.mockClear(); - mockSsmSend.mockClear(); - applicationsMapService.reset(); - mockSsmSend.mockResolvedValue({ Parameter: { Value: applicationsMap } }); // Reset loader and clear cache for clean state between tests configLoaderService.reset( createS3Client({ AWS_ENDPOINT_URL: "http://localhost:4566" }), @@ -129,7 +104,6 @@ describe("Lambda handler with S3 subscription filtering", () => { delete process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS; delete process.env.METRICS_NAMESPACE; delete process.env.ENVIRONMENT; - delete process.env.APPLICATIONS_MAP_PARAMETER; }); it("passes event through when client config matches subscription", async () => { @@ -148,12 +122,8 @@ describe("Lambda handler with S3 subscription filtering", () => { expect(result).toHaveLength(1); expect(mockSend).toHaveBeenCalledTimes(1); expect(mockSend.mock.calls[0][0]).toBeInstanceOf(GetObjectCommand); - expect(mockSsmSend).toHaveBeenCalledTimes(1); - expect(mockSsmSend.mock.calls[0][0]).toBeInstanceOf(GetParameterCommand); expect(result[0]).toHaveProperty("payload"); expect(result[0]).toHaveProperty("subscriptions"); - expect(result[0]).toHaveProperty("signatures"); - expect(Object.values(result[0].signatures)[0]).toMatch(/^[0-9a-f]+$/); }); it("filters out event when status is not in subscription", async () => { @@ -251,25 +221,4 @@ describe("Lambda handler with S3 subscription filtering", () => { // S3 fetched once per distinct client (client-a and client-b), not once per event expect(mockSend).toHaveBeenCalledTimes(2); }); - - it("filters out event when no applicationId found in SSM map", async () => { - mockSend.mockResolvedValue({ - Body: { - transformToString: jest - .fn() - .mockResolvedValue( - JSON.stringify(createValidConfig("client-unknown")), - ), - }, - }); - mockSsmSend.mockResolvedValue({ - Parameter: { Value: JSON.stringify({}) }, - }); - - const result = await handler([ - makeSqsRecord(validMessageStatusEvent("client-unknown", "DELIVERED")), - ]); - - expect(result).toHaveLength(0); - }); }); diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/index.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/index.test.ts index 14b10096..168d128d 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/index.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/index.test.ts @@ -10,7 +10,6 @@ import type { import type { Logger } from "services/logger"; import type { CallbackMetrics } from "services/metrics"; import type { ConfigLoader } from "services/config-loader"; -import type { ApplicationsMapService } from "services/ssm-applications-map"; import { ObservabilityService } from "services/observability"; import { ConfigLoaderService } from "services/config-loader-service"; import { @@ -71,15 +70,6 @@ const makeStubConfigLoaderService = (): ConfigLoaderService => { return { getLoader: () => loader } as unknown as ConfigLoaderService; }; -const makeStubApplicationsMapService = (): ApplicationsMapService => - ({ - getApplicationId: jest - .fn() - .mockImplementation( - async (clientId: string) => `test-app-id-${clientId}`, - ), - }) as unknown as ApplicationsMapService; - describe("Lambda handler", () => { const mockLogger = { info: jest.fn(), @@ -109,7 +99,6 @@ describe("Lambda handler", () => { createObservabilityService: () => new ObservabilityService(mockLogger, mockMetrics, mockMetricsLogger), createConfigLoaderService: makeStubConfigLoaderService, - createApplicationsMapService: makeStubApplicationsMapService, }); beforeEach(() => { @@ -173,7 +162,6 @@ describe("Lambda handler", () => { expect(result).toHaveLength(1); expect(result[0]).toHaveProperty("payload"); expect(result[0]).toHaveProperty("subscriptions"); - expect(result[0]).toHaveProperty("signatures"); const dataItem = result[0].payload.data[0]; expect(dataItem.type).toBe("MessageStatus"); expect((dataItem.attributes as MessageStatusAttributes).messageStatus).toBe( @@ -203,7 +191,6 @@ describe("Lambda handler", () => { new ObservabilityService(mockLogger, mockMetrics, mockMetricsLogger), createConfigLoaderService: () => ({ getLoader: () => customConfigLoader }) as ConfigLoaderService, - createApplicationsMapService: makeStubApplicationsMapService, }); const sqsMessage: SQSRecord = { @@ -234,65 +221,6 @@ describe("Lambda handler", () => { ); }); - it("should throw when any target is missing an apiKey", async () => { - const customConfigLoader = { - loadClientConfig: jest.fn().mockResolvedValue( - createClientSubscriptionConfig("client-abc-123", { - subscriptions: [ - createMessageStatusSubscription(["DELIVERED"], { - targetIds: ["target-no-key", DEFAULT_TARGET_ID], - }), - ], - targets: [ - createTarget({ - targetId: "target-no-key", - apiKey: undefined as unknown as { - headerName: string; - headerValue: string; - }, - }), - createTarget({ - targetId: DEFAULT_TARGET_ID, - apiKey: { - headerName: "x-api-key", - headerValue: "valid-key", - }, - }), - ], - }), - ), - } as unknown as ConfigLoader; - - const handlerWithMixedTargets = createHandler({ - createObservabilityService: () => - new ObservabilityService(mockLogger, mockMetrics, mockMetricsLogger), - createConfigLoaderService: () => - ({ getLoader: () => customConfigLoader }) as ConfigLoaderService, - createApplicationsMapService: makeStubApplicationsMapService, - }); - - const sqsMessage: SQSRecord = { - messageId: "sqs-msg-id-mixed", - receiptHandle: "receipt-handle-mixed", - body: JSON.stringify(validMessageStatusEvent), - attributes: { - ApproximateReceiveCount: "1", - SentTimestamp: "1519211230", - SenderId: "ABCDEFGHIJ", - ApproximateFirstReceiveTimestamp: "1519211230", - }, - messageAttributes: {}, - md5OfBody: "mock-md5", - eventSource: "aws:sqs", - eventSourceARN: "arn:aws:sqs:eu-west-2:123456789:mock-queue", - awsRegion: "eu-west-2", - }; - - await expect(handlerWithMixedTargets([sqsMessage])).rejects.toThrow( - "Missing apiKey for target target-no-key", - ); - }); - it("should handle batch of SQS messages from EventBridge Pipes", async () => { const sqsMessages: SQSRecord[] = [ { @@ -414,7 +342,6 @@ describe("Lambda handler", () => { expect(result).toHaveLength(1); expect(result[0]).toHaveProperty("payload"); expect(result[0]).toHaveProperty("subscriptions"); - expect(result[0]).toHaveProperty("signatures"); const dataItem = result[0].payload.data[0]; expect(dataItem.type).toBe("ChannelStatus"); expect((dataItem.attributes as ChannelStatusAttributes).channelStatus).toBe( @@ -481,7 +408,6 @@ describe("Lambda handler", () => { const faultyHandler = createHandler({ createObservabilityService: () => faultyObservability, createConfigLoaderService: makeStubConfigLoaderService, - createApplicationsMapService: makeStubApplicationsMapService, }); const sqsMessage: SQSRecord = { @@ -662,7 +588,6 @@ describe("createHandler default wiring", () => { [], state.mockObservabilityInstance, expect.any(Object), - expect.any(Object), ); expect(result).toEqual(["ok"]); diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-cache.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-cache.test.ts index 6199b92c..e86ef69f 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-cache.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-cache.test.ts @@ -3,7 +3,7 @@ import { createClientSubscriptionConfig, createMessageStatusSubscription, } from "__tests__/helpers/client-subscription-fixtures"; -import { ConfigCache } from "services/config-cache"; +import { ConfigCache } from "@nhs-notify-client-callbacks/config-cache"; const createConfig = (): ClientSubscriptionConfiguration => createClientSubscriptionConfig("client-1", { diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader.test.ts index 495164fb..a94a5e0c 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader.test.ts @@ -1,6 +1,6 @@ import { GetObjectCommand, NoSuchKey, S3Client } from "@aws-sdk/client-s3"; import { createMessageStatusConfig } from "__tests__/helpers/client-subscription-fixtures"; -import { ConfigCache } from "services/config-cache"; +import { ConfigCache } from "@nhs-notify-client-callbacks/config-cache"; import { ConfigLoader } from "services/config-loader"; import { ConfigValidationError } from "services/validators/config-validator"; diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-update.component.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-update.component.test.ts index 81af7f04..c6e0e532 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-update.component.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-update.component.test.ts @@ -1,6 +1,6 @@ import { S3Client } from "@aws-sdk/client-s3"; import { createMessageStatusConfig } from "__tests__/helpers/client-subscription-fixtures"; -import { ConfigCache } from "services/config-cache"; +import { ConfigCache } from "@nhs-notify-client-callbacks/config-cache"; import { ConfigLoader } from "services/config-loader"; const makeConfig = (messageStatuses: string[]) => diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/payload-signer.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/payload-signer.test.ts deleted file mode 100644 index e1785d55..00000000 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/payload-signer.test.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { createHmac } from "node:crypto"; -import type { ClientCallbackPayload } from "@nhs-notify-client-callbacks/models"; -import { signPayload } from "services/payload-signer"; - -const makePayload = (id = "msg-1") => - ({ data: [{ id }] }) as unknown as ClientCallbackPayload; - -describe("signPayload", () => { - it("produces the expected HMAC-SHA256 hex string", () => { - const payload = makePayload(); - const applicationId = "app-id-1"; - const apiKey = "api-key-1"; - - const expected = createHmac("sha256", `${applicationId}.${apiKey}`) - .update(JSON.stringify(payload)) - .digest("hex"); - - expect(signPayload(payload, applicationId, apiKey)).toBe(expected); - }); - - it("returns a non-empty hex string", () => { - const result = signPayload(makePayload(), "app-id", "api-key"); - expect(result).toMatch(/^[0-9a-f]+$/); - }); - - it("produces different signatures for different payloads", () => { - const apiKey = "key"; - const appId = "app"; - expect(signPayload(makePayload("msg-1"), appId, apiKey)).not.toBe( - signPayload(makePayload("msg-2"), appId, apiKey), - ); - }); - - it("produces different signatures for different applicationIds", () => { - const payload = makePayload(); - const apiKey = "key"; - expect(signPayload(payload, "app-1", apiKey)).not.toBe( - signPayload(payload, "app-2", apiKey), - ); - }); - - it("produces different signatures for different apiKeys", () => { - const payload = makePayload(); - const appId = "app"; - expect(signPayload(payload, appId, "key-1")).not.toBe( - signPayload(payload, appId, "key-2"), - ); - }); -}); diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/ssm-applications-map.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/ssm-applications-map.test.ts deleted file mode 100644 index 7123009a..00000000 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/ssm-applications-map.test.ts +++ /dev/null @@ -1,156 +0,0 @@ -import { GetParameterCommand, SSMClient } from "@aws-sdk/client-ssm"; -import { - ApplicationsMapService, - createSsmClient, - resolveCacheTtlMs, -} from "services/ssm-applications-map"; - -jest.mock("services/logger", () => ({ - logger: { - debug: jest.fn(), - info: jest.fn(), - warn: jest.fn(), - error: jest.fn(), - }, -})); - -const makeSsmClient = (value: string | undefined) => - ({ - send: jest - .fn() - .mockResolvedValue( - value === undefined ? {} : { Parameter: { Value: value } }, - ), - }) as unknown as SSMClient; - -describe("ApplicationsMapService", () => { - beforeEach(() => { - jest.useFakeTimers(); - }); - - afterEach(() => { - jest.useRealTimers(); - }); - - it("returns the applicationId for a known clientId", async () => { - const ssmClient = makeSsmClient( - JSON.stringify({ "client-1": "app-id-1", "client-2": "app-id-2" }), - ); - const service = new ApplicationsMapService(ssmClient, "/test/param"); - - expect(await service.getApplicationId("client-1")).toBe("app-id-1"); - }); - - it("returns undefined for an unknown clientId", async () => { - const ssmClient = makeSsmClient(JSON.stringify({ "client-1": "app-id-1" })); - const service = new ApplicationsMapService(ssmClient, "/test/param"); - - expect(await service.getApplicationId("unknown")).toBeUndefined(); - }); - - it("loads from SSM and sends GetParameterCommand with WithDecryption", async () => { - const ssmClient = makeSsmClient(JSON.stringify({ "client-1": "app-id-1" })); - const service = new ApplicationsMapService(ssmClient, "/test/param"); - - await service.getApplicationId("client-1"); - - expect(ssmClient.send).toHaveBeenCalledTimes(1); - expect((ssmClient.send as jest.Mock).mock.calls[0][0]).toBeInstanceOf( - GetParameterCommand, - ); - }); - - it("caches the map and does not call SSM again within TTL", async () => { - const ssmClient = makeSsmClient(JSON.stringify({ "client-1": "app-id-1" })); - const service = new ApplicationsMapService(ssmClient, "/test/param", 5000); - - await service.getApplicationId("client-1"); - await service.getApplicationId("client-1"); - - expect(ssmClient.send).toHaveBeenCalledTimes(1); - }); - - it("reloads from SSM after TTL expires", async () => { - const ssmClient = makeSsmClient(JSON.stringify({ "client-1": "app-id-1" })); - const service = new ApplicationsMapService(ssmClient, "/test/param", 5000); - - await service.getApplicationId("client-1"); - jest.advanceTimersByTime(6000); - await service.getApplicationId("client-1"); - - expect(ssmClient.send).toHaveBeenCalledTimes(2); - }); - - it("throws when SSM parameter is missing", async () => { - const ssmClient = makeSsmClient(undefined); - const service = new ApplicationsMapService(ssmClient, "/test/param"); - - await expect(service.getApplicationId("client-1")).rejects.toThrow( - "SSM parameter '/test/param' not found or has no value", - ); - }); - - it("throws when APPLICATIONS_MAP_PARAMETER is not set", async () => { - const ssmClient = makeSsmClient(JSON.stringify({ "client-1": "app-id-1" })); - const service = new ApplicationsMapService(ssmClient, undefined); - - await expect(service.getApplicationId("client-1")).rejects.toThrow( - "APPLICATIONS_MAP_PARAMETER is required", - ); - }); - - it("throws when SSM parameter has empty value", async () => { - const ssmClient = { - send: jest.fn().mockResolvedValue({ Parameter: { Value: "" } }), - } as unknown as SSMClient; - const service = new ApplicationsMapService(ssmClient, "/test/param"); - - await expect(service.getApplicationId("client-1")).rejects.toThrow( - "SSM parameter '/test/param' not found or has no value", - ); - }); - - it("throws when SSM parameter contains invalid JSON", async () => { - const ssmClient = makeSsmClient("not valid json"); - const service = new ApplicationsMapService(ssmClient, "/test/param"); - - await expect(service.getApplicationId("client-1")).rejects.toThrow( - "SSM parameter '/test/param' contains invalid JSON", - ); - }); - - it("reset clears the cache and forces reload on next call", async () => { - const ssmClient = makeSsmClient(JSON.stringify({ "client-1": "app-id-1" })); - const service = new ApplicationsMapService(ssmClient, "/test/param", 5000); - - await service.getApplicationId("client-1"); - service.reset(); - await service.getApplicationId("client-1"); - - expect(ssmClient.send).toHaveBeenCalledTimes(2); - }); -}); - -describe("resolveCacheTtlMs", () => { - it("returns configured value in ms", () => { - expect( - resolveCacheTtlMs({ APPLICATIONS_MAP_CACHE_TTL_SECONDS: "30" }), - ).toBe(30_000); - }); - - it("returns default when env var is absent", () => { - expect(resolveCacheTtlMs({})).toBe(60_000); - }); - - it("returns default when env var is not a valid number", () => { - expect( - resolveCacheTtlMs({ APPLICATIONS_MAP_CACHE_TTL_SECONDS: "invalid" }), - ).toBe(60_000); - }); -}); - -describe("createSsmClient", () => { - it("returns an SSMClient instance", () => { - expect(createSsmClient({})).toBeInstanceOf(SSMClient); - }); -}); diff --git a/lambdas/client-transform-filter-lambda/src/handler.ts b/lambdas/client-transform-filter-lambda/src/handler.ts index 0d1f20b6..be05991c 100644 --- a/lambdas/client-transform-filter-lambda/src/handler.ts +++ b/lambdas/client-transform-filter-lambda/src/handler.ts @@ -7,13 +7,11 @@ import type { } from "@nhs-notify-client-callbacks/models"; import { validateStatusPublishEvent } from "services/validators/event-validator"; import { transformEvent } from "services/transformers/event-transformer"; -import { extractCorrelationId, logger } from "services/logger"; +import { extractCorrelationId } from "services/logger"; import { ValidationError, getEventError } from "services/error-handler"; import type { ObservabilityService } from "services/observability"; import type { ConfigLoader } from "services/config-loader"; import { evaluateSubscriptionFilters } from "services/subscription-filter"; -import type { ApplicationsMapService } from "services/ssm-applications-map"; -import { signPayload } from "services/payload-signer"; const BATCH_CONCURRENCY = Number(process.env.BATCH_CONCURRENCY) || 10; const MESSAGE_ROOT_URI = process.env.MESSAGE_ROOT_URI ?? ""; @@ -27,20 +25,9 @@ type FilteredEvent = UnsignedEvent & { targetIds: string[]; }; -type SignedEvent = { - transformedEvent: TransformedEvent; - deliveryContext: { - correlationId: string; - eventType: string; - clientId: string; - messageId: string; - }; -}; - export interface TransformedEvent { payload: ClientCallbackPayload; subscriptions: string[]; - signatures: Record; } class BatchStats { @@ -140,79 +127,6 @@ function processSingleEvent( type ClientConfigMap = Map; -async function signBatch( - filteredEvents: FilteredEvent[], - applicationsMapService: ApplicationsMapService, - configByClientId: ClientConfigMap, - stats: BatchStats, - observability: ObservabilityService, -): Promise { - const results = await pMap( - filteredEvents, - async (event): Promise => { - const { clientId } = event.data; - const correlationId = extractCorrelationId(event) ?? event.id; - - const applicationId = - await applicationsMapService.getApplicationId(clientId); - if (!applicationId) { - stats.recordFiltered(); - logger.warn( - "No applicationId found in SSM map - event will not be delivered", - { clientId, correlationId }, - ); - return undefined; - } - - const clientConfig = configByClientId.get(clientId); - const targetsById = new Map( - (clientConfig?.targets ?? []).map((t) => [t.targetId, t]), - ); - - const signaturesByTarget = new Map(); - - for (const targetId of event.targetIds) { - const target = targetsById.get(targetId); - const apiKey = target?.apiKey?.headerValue; - if (!apiKey) { - throw new ValidationError( - `Missing apiKey for target ${targetId}`, - correlationId, - ); - } - const signature = signPayload( - event.transformedPayload, - applicationId, - apiKey, - ); - signaturesByTarget.set(targetId.replaceAll("-", "_"), signature); - observability.recordCallbackSigned( - event.transformedPayload, - correlationId, - clientId, - signature, - ); - } - - return { - transformedEvent: { - payload: event.transformedPayload, - subscriptions: event.subscriptionIds, - signatures: Object.fromEntries(signaturesByTarget), - }, - deliveryContext: { - correlationId, - eventType: event.type, - clientId, - messageId: event.data.messageId, - }, - }; - }, - { concurrency: BATCH_CONCURRENCY }, - ); - return results.filter((e): e is SignedEvent => e !== undefined); -} - async function loadClientConfigs( events: UnsignedEvent[], configLoader: ConfigLoader, @@ -304,7 +218,6 @@ export async function processEvents( event: SQSRecord[], observability: ObservabilityService, configLoader: ConfigLoader, - applicationsMapService: ApplicationsMapService, ): Promise { const startTime = Date.now(); const stats = new BatchStats(); @@ -324,20 +237,21 @@ export async function processEvents( stats, ); - const signedEvents = await signBatch( - filteredEvents, - applicationsMapService, - configByClientId, - stats, - observability, - ); - - for (const signedEvent of signedEvents) { - observability.recordDeliveryInitiated(signedEvent.deliveryContext); - } + const deliverableEvents: TransformedEvent[] = filteredEvents.map( + (filteredEvent) => { + const correlationId = extractCorrelationId(filteredEvent); + observability.recordDeliveryInitiated({ + correlationId, + eventType: filteredEvent.type, + clientId: filteredEvent.data.clientId, + messageId: filteredEvent.data.messageId, + }); - const deliverableEvents = signedEvents.map( - (signedEvent) => signedEvent.transformedEvent, + return { + payload: filteredEvent.transformedPayload, + subscriptions: filteredEvent.subscriptionIds, + }; + }, ); const processingTime = Date.now() - startTime; diff --git a/lambdas/client-transform-filter-lambda/src/index.ts b/lambdas/client-transform-filter-lambda/src/index.ts index 9d631bfe..5ef8e197 100644 --- a/lambdas/client-transform-filter-lambda/src/index.ts +++ b/lambdas/client-transform-filter-lambda/src/index.ts @@ -3,17 +3,13 @@ import { Logger } from "services/logger"; import { CallbackMetrics, createMetricLogger } from "services/metrics"; import { ObservabilityService } from "services/observability"; import { ConfigLoaderService } from "services/config-loader-service"; -import { ApplicationsMapService } from "services/ssm-applications-map"; import { type TransformedEvent, processEvents } from "handler"; export const configLoaderService = new ConfigLoaderService(); -export const applicationsMapService = new ApplicationsMapService(); - export interface HandlerDependencies { createObservabilityService?: () => ObservabilityService; createConfigLoaderService?: () => ConfigLoaderService; - createApplicationsMapService?: () => ApplicationsMapService; } function createDefaultObservabilityService(): ObservabilityService { @@ -28,10 +24,6 @@ function createDefaultConfigLoaderService(): ConfigLoaderService { return configLoaderService; } -function createDefaultApplicationsMapService(): ApplicationsMapService { - return applicationsMapService; -} - export function createHandler( dependencies: Partial = {}, ): (event: SQSRecord[]) => Promise { @@ -41,19 +33,10 @@ export function createHandler( const configLoader = ( dependencies.createConfigLoaderService ?? createDefaultConfigLoaderService )(); - const applicationsMap = ( - dependencies.createApplicationsMapService ?? - createDefaultApplicationsMapService - )(); return async (event: SQSRecord[]): Promise => { const observability = createObservabilityService(); - return processEvents( - event, - observability, - configLoader.getLoader(), - applicationsMap, - ); + return processEvents(event, observability, configLoader.getLoader()); }; } diff --git a/lambdas/client-transform-filter-lambda/src/services/config-loader-service.ts b/lambdas/client-transform-filter-lambda/src/services/config-loader-service.ts index b0af71b0..43f760c8 100644 --- a/lambdas/client-transform-filter-lambda/src/services/config-loader-service.ts +++ b/lambdas/client-transform-filter-lambda/src/services/config-loader-service.ts @@ -1,5 +1,5 @@ import { S3Client } from "@aws-sdk/client-s3"; -import { ConfigCache } from "services/config-cache"; +import { ConfigCache } from "@nhs-notify-client-callbacks/config-cache"; import { ConfigLoader } from "services/config-loader"; const DEFAULT_CACHE_TTL_SECONDS = 60; diff --git a/lambdas/client-transform-filter-lambda/src/services/config-loader.ts b/lambdas/client-transform-filter-lambda/src/services/config-loader.ts index 2d5b388f..76a5380d 100644 --- a/lambdas/client-transform-filter-lambda/src/services/config-loader.ts +++ b/lambdas/client-transform-filter-lambda/src/services/config-loader.ts @@ -1,6 +1,6 @@ import { GetObjectCommand, NoSuchKey, S3Client } from "@aws-sdk/client-s3"; import type { ClientSubscriptionConfiguration } from "@nhs-notify-client-callbacks/models"; -import { ConfigCache } from "services/config-cache"; +import { ConfigCache } from "@nhs-notify-client-callbacks/config-cache"; import { logger } from "services/logger"; import { wrapUnknownError } from "services/error-handler"; import { diff --git a/lambdas/client-transform-filter-lambda/src/services/observability.ts b/lambdas/client-transform-filter-lambda/src/services/observability.ts index 4cfbf469..efd55eea 100644 --- a/lambdas/client-transform-filter-lambda/src/services/observability.ts +++ b/lambdas/client-transform-filter-lambda/src/services/observability.ts @@ -1,9 +1,6 @@ import type { MetricsLogger } from "aws-embedded-metrics"; import type { ClientCallbackPayload } from "@nhs-notify-client-callbacks/models"; -import { - logCallbackGenerated, - logCallbackSigned, -} from "services/callback-logger"; +import { logCallbackGenerated } from "services/callback-logger"; import type { Logger } from "services/logger"; import { logLifecycleEvent } from "services/logger"; import type { CallbackMetrics } from "services/metrics"; @@ -95,15 +92,6 @@ export class ObservabilityService { this.metrics.emitTransformationSuccess(); } - recordCallbackSigned( - payload: ClientCallbackPayload, - correlationId: string | undefined, - clientId: string, - signature: string, - ): void { - logCallbackSigned(this.logger, payload, correlationId, clientId, signature); - } - createChild(context: { correlationId?: string; eventType: string; diff --git a/lambdas/client-transform-filter-lambda/src/services/ssm-applications-map.ts b/lambdas/client-transform-filter-lambda/src/services/ssm-applications-map.ts deleted file mode 100644 index 87cead24..00000000 --- a/lambdas/client-transform-filter-lambda/src/services/ssm-applications-map.ts +++ /dev/null @@ -1,85 +0,0 @@ -import { GetParameterCommand, SSMClient } from "@aws-sdk/client-ssm"; -import { logger } from "services/logger"; - -const DEFAULT_CACHE_TTL_SECONDS = 60; - -export const createSsmClient = ( - env: NodeJS.ProcessEnv = process.env, -): SSMClient => { - const endpoint = env.AWS_ENDPOINT_URL; - return new SSMClient({ endpoint }); -}; - -export const resolveCacheTtlMs = ( - env: NodeJS.ProcessEnv = process.env, -): number => { - const ttlSeconds = Number.parseInt( - env.APPLICATIONS_MAP_CACHE_TTL_SECONDS ?? `${DEFAULT_CACHE_TTL_SECONDS}`, - 10, - ); - return ( - (Number.isFinite(ttlSeconds) ? ttlSeconds : DEFAULT_CACHE_TTL_SECONDS) * - 1000 - ); -}; - -export class ApplicationsMapService { - private cachedMap: Map | undefined; - - private cacheExpiresAt = 0; - - constructor( - private readonly ssmClient: SSMClient = createSsmClient(), - private readonly parameterName: string | undefined = process.env - .APPLICATIONS_MAP_PARAMETER, - private readonly cacheTtlMs: number = resolveCacheTtlMs(), - ) {} - - async getApplicationId(clientId: string): Promise { - const map = await this.getMap(); - return map.get(clientId); - } - - private async getMap(): Promise> { - if (!this.parameterName) { - throw new Error("APPLICATIONS_MAP_PARAMETER is required"); - } - const { parameterName } = this; - - if (this.cachedMap && Date.now() < this.cacheExpiresAt) { - logger.debug("Applications map loaded from cache"); - return this.cachedMap; - } - - const response = await this.ssmClient.send( - new GetParameterCommand({ - Name: parameterName, - WithDecryption: true, - }), - ); - - if (!response.Parameter?.Value) { - throw new Error( - `SSM parameter '${parameterName}' not found or has no value`, - ); - } - - let parsed: Record; - try { - parsed = JSON.parse(response.Parameter.Value) as Record; - } catch { - throw new Error(`SSM parameter '${parameterName}' contains invalid JSON`); - } - this.cachedMap = new Map(Object.entries(parsed)); - this.cacheExpiresAt = Date.now() + this.cacheTtlMs; - logger.info("Applications map loaded from SSM", { - parameterName, - }); - return this.cachedMap; - } - - reset(): void { - this.cachedMap = undefined; - this.cacheExpiresAt = 0; - } -} diff --git a/lambdas/https-client-lambda/jest.config.ts b/lambdas/https-client-lambda/jest.config.ts new file mode 100644 index 00000000..cd0ed08e --- /dev/null +++ b/lambdas/https-client-lambda/jest.config.ts @@ -0,0 +1,9 @@ +import { nodeJestConfig } from "../../jest.config.base.ts"; + +export default { + ...nodeJestConfig, + transform: { + ...nodeJestConfig.transform, + "\\.lua$": "/lua-transform.js", + }, +}; diff --git a/lambdas/https-client-lambda/lua-transform.js b/lambdas/https-client-lambda/lua-transform.js new file mode 100644 index 00000000..e6e0a1c9 --- /dev/null +++ b/lambdas/https-client-lambda/lua-transform.js @@ -0,0 +1,7 @@ +module.exports = { + process(sourceText) { + return { + code: `module.exports = ${JSON.stringify(sourceText)};`, + }; + }, +}; diff --git a/lambdas/https-client-lambda/package.json b/lambdas/https-client-lambda/package.json new file mode 100644 index 00000000..19763116 --- /dev/null +++ b/lambdas/https-client-lambda/package.json @@ -0,0 +1,39 @@ +{ + "dependencies": { + "@aws-sdk/client-s3": "catalog:aws", + "@aws-sdk/client-secrets-manager": "catalog:aws", + "@aws-sdk/client-sqs": "catalog:aws", + "@aws-sdk/client-ssm": "catalog:aws", + "@nhs-notify-client-callbacks/config-cache": "workspace:*", + "@nhs-notify-client-callbacks/logger": "workspace:*", + "@nhs-notify-client-callbacks/models": "workspace:*", + "@redis/client": "catalog:app", + "aws-embedded-metrics": "catalog:app", + "esbuild": "catalog:tools", + "node-forge": "catalog:app", + "p-map": "catalog:app" + }, + "devDependencies": { + "@tsconfig/node22": "catalog:tools", + "@types/aws-lambda": "catalog:tools", + "@types/jest": "catalog:test", + "@types/node": "catalog:tools", + "@types/node-forge": "catalog:tools", + "eslint": "catalog:lint", + "jest": "catalog:test", + "typescript": "catalog:tools" + }, + "engines": { + "node": ">=24.14.1" + }, + "name": "@nhs-notify-client-callbacks/https-client-lambda", + "private": true, + "scripts": { + "lambda-build": "rm -rf dist && pnpm exec esbuild --bundle --minify --sourcemap --target=es2020 --platform=node --loader:.node=file --loader:.lua=text --entry-names=[name] --outdir=dist src/index.ts", + "lint": "eslint .", + "lint:fix": "eslint . --fix", + "test:unit": "jest", + "typecheck": "tsc --noEmit" + }, + "version": "0.0.1" +} diff --git a/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts b/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts new file mode 100644 index 00000000..a8ccdfb4 --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts @@ -0,0 +1,149 @@ +import { GetObjectCommand } from "@aws-sdk/client-s3"; + +import { loadTargetConfig, resetCache } from "services/config-loader"; + +const mockS3Send = jest.fn(); +jest.mock("@aws-sdk/client-s3", () => { + const actual = jest.requireActual("@aws-sdk/client-s3"); + return { + ...actual, + S3Client: jest.fn().mockImplementation(() => ({ + send: (...args: unknown[]) => mockS3Send(...args), + })), + }; +}); + +jest.mock("services/logger", () => ({ + logger: { + info: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + debug: jest.fn(), + }, +})); + +process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET = "test-bucket"; +process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX = "client_subscriptions/"; +process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS = "1"; + +const VALID_TARGET = { + targetId: "target-1", + type: "API" as const, + invocationEndpoint: "https://webhook.example.invalid", + invocationMethod: "POST" as const, + invocationRateLimit: 10, + apiKey: { headerName: "x-api-key", headerValue: "secret" }, + mtls: { enabled: false }, + certPinning: { enabled: false }, +}; + +const VALID_CONFIG = { + clientId: "client-1", + subscriptions: [], + targets: [VALID_TARGET], +}; + +const makeS3Response = (body: unknown) => ({ + Body: { + transformToString: jest.fn().mockResolvedValue(JSON.stringify(body)), + }, +}); + +describe("loadTargetConfig", () => { + beforeEach(() => { + mockS3Send.mockReset(); + resetCache(); + }); + + it("parses valid S3 config and returns the matching target", async () => { + mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); + + const result = await loadTargetConfig("client-1", "target-1"); + + expect(result).toEqual(VALID_TARGET); + expect(mockS3Send).toHaveBeenCalledTimes(1); + expect(mockS3Send.mock.calls[0][0]).toBeInstanceOf(GetObjectCommand); + }); + + it("uses CLIENT_SUBSCRIPTION_CONFIG_PREFIX for the S3 key", async () => { + mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); + + await loadTargetConfig("client-1", "target-1"); + + const command: GetObjectCommand = mockS3Send.mock.calls[0][0]; + expect(command.input.Key).toBe("client_subscriptions/client-1.json"); + }); + + it("rejects config missing required field", async () => { + // eslint-disable-next-line @typescript-eslint/naming-convention, sonarjs/no-unused-vars -- destructuring to exclude mtls + const { mtls: _unusedMtls, ...targetWithoutMtls } = VALID_TARGET; + const invalidConfig = { + ...VALID_CONFIG, + targets: [targetWithoutMtls], + }; + mockS3Send.mockResolvedValue(makeS3Response(invalidConfig)); + + await expect(loadTargetConfig("client-1", "target-1")).rejects.toThrow( + "Invalid client config for 'client-1'", + ); + }); + + it("returns cached value without S3 call on subsequent requests", async () => { + mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); + + await loadTargetConfig("client-1", "target-1"); + await loadTargetConfig("client-1", "target-1"); + + expect(mockS3Send).toHaveBeenCalledTimes(1); + }); + + it("re-fetches from S3 after TTL expiry", async () => { + jest.useFakeTimers(); + jest.setSystemTime(new Date("2026-01-01T10:00:00Z")); + + mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); + + await loadTargetConfig("client-1", "target-1"); + + jest.advanceTimersByTime(1001); + + await loadTargetConfig("client-1", "target-1"); + + expect(mockS3Send).toHaveBeenCalledTimes(2); + + jest.useRealTimers(); + }); + + it("throws when CLIENT_SUBSCRIPTION_CONFIG_BUCKET is not set", async () => { + let loadFn: typeof loadTargetConfig; + const saved = process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET; + delete process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET; + + jest.isolateModules(() => { + // eslint-disable-next-line @typescript-eslint/no-require-imports -- jest.isolateModules requires synchronous require + loadFn = require("services/config-loader").loadTargetConfig; + }); + + await expect(loadFn!("client-1", "target-1")).rejects.toThrow( + "CLIENT_SUBSCRIPTION_CONFIG_BUCKET is required", + ); + + process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET = saved; + }); + + it("throws when S3 response body is empty", async () => { + mockS3Send.mockResolvedValue({ Body: undefined }); + + await expect(loadTargetConfig("client-1", "target-1")).rejects.toThrow( + "S3 response body was empty for client 'client-1'", + ); + }); + + it("throws when target not found in config", async () => { + mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); + + await expect(loadTargetConfig("client-1", "nonexistent")).rejects.toThrow( + "Target 'nonexistent' not found in config for client 'client-1'", + ); + }); +}); diff --git a/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts b/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts new file mode 100644 index 00000000..f8142060 --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts @@ -0,0 +1,188 @@ +const mockCreateMetricsLogger = jest.fn(); +jest.mock("aws-embedded-metrics", () => ({ + Unit: { Count: "Count" }, + createMetricsLogger: () => mockCreateMetricsLogger(), +})); + +describe("delivery-metrics", () => { + const mockMetrics = { + setNamespace: jest.fn(), + setDimensions: jest.fn(), + setProperty: jest.fn(), + putMetric: jest.fn(), + flush: jest.fn().mockResolvedValue(undefined), + }; + + beforeEach(() => { + jest.resetModules(); + jest.clearAllMocks(); + mockCreateMetricsLogger.mockReturnValue(mockMetrics); + process.env.METRICS_NAMESPACE = "TestNamespace"; + process.env.ENVIRONMENT = "test"; + }); + + afterEach(() => { + delete process.env.METRICS_NAMESPACE; + delete process.env.ENVIRONMENT; + }); + + it("throws when METRICS_NAMESPACE is not set", async () => { + delete process.env.METRICS_NAMESPACE; + // @ts-expect-error -- modulePaths resolves at runtime + const { emitDeliveryAttempt } = await import("services/delivery-metrics"); + + expect(() => emitDeliveryAttempt("t-1")).toThrow( + "METRICS_NAMESPACE environment variable is not set", + ); + }); + + it("throws when ENVIRONMENT is not set", async () => { + delete process.env.ENVIRONMENT; + // @ts-expect-error -- modulePaths resolves at runtime + const { emitDeliveryAttempt } = await import("services/delivery-metrics"); + + expect(() => emitDeliveryAttempt("t-1")).toThrow( + "ENVIRONMENT environment variable is not set", + ); + }); + + it("creates metrics logger with correct namespace and dimensions", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const { emitDeliveryAttempt } = await import("services/delivery-metrics"); + + emitDeliveryAttempt("t-1"); + + expect(mockMetrics.setNamespace).toHaveBeenCalledWith("TestNamespace"); + expect(mockMetrics.setDimensions).toHaveBeenCalledWith({ + Environment: "test", + }); + }); + + it("caches the metrics logger on subsequent calls", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery-metrics"); + const { emitDeliveryAttempt, emitDeliverySuccess } = mod; + + emitDeliveryAttempt("t-1"); + emitDeliverySuccess("t-1"); + + expect(mockCreateMetricsLogger).toHaveBeenCalledTimes(1); + }); + + it("emitDeliveryAttempt emits correct metric", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const { emitDeliveryAttempt } = await import("services/delivery-metrics"); + + emitDeliveryAttempt("target-42"); + + expect(mockMetrics.setProperty).toHaveBeenCalledWith( + "targetId", + "target-42", + ); + expect(mockMetrics.putMetric).toHaveBeenCalledWith( + "DeliveryAttempt", + 1, + "Count", + ); + }); + + it("emitDeliverySuccess emits correct metric", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const { emitDeliverySuccess } = await import("services/delivery-metrics"); + + emitDeliverySuccess("target-42"); + + expect(mockMetrics.putMetric).toHaveBeenCalledWith( + "DeliverySuccess", + 1, + "Count", + ); + }); + + it("emitDeliveryFailure emits correct metric", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const { emitDeliveryFailure } = await import("services/delivery-metrics"); + + emitDeliveryFailure("target-42"); + + expect(mockMetrics.putMetric).toHaveBeenCalledWith( + "DeliveryFailure", + 1, + "Count", + ); + }); + + it("emitDeliveryPermanentFailure emits correct metric", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery-metrics"); + const { emitDeliveryPermanentFailure } = mod; + + emitDeliveryPermanentFailure("target-42"); + + expect(mockMetrics.putMetric).toHaveBeenCalledWith( + "DeliveryPermanentFailure", + 1, + "Count", + ); + }); + + it("emitCircuitBreakerOpen emits correct metric", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery-metrics"); + const { emitCircuitBreakerOpen } = mod; + + emitCircuitBreakerOpen("target-42"); + + expect(mockMetrics.putMetric).toHaveBeenCalledWith( + "CircuitBreakerOpen", + 1, + "Count", + ); + }); + + it("emitRateLimited emits correct metric", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery-metrics"); + const { emitRateLimited } = mod; + + emitRateLimited("target-42"); + + expect(mockMetrics.putMetric).toHaveBeenCalledWith( + "DeliveryRateLimited", + 1, + "Count", + ); + }); + + it("flushMetrics calls flush on the instance", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery-metrics"); + const { emitDeliveryAttempt, flushMetrics } = mod; + + emitDeliveryAttempt("t-1"); + await flushMetrics(); + + expect(mockMetrics.flush).toHaveBeenCalled(); + }); + + it("flushMetrics does nothing when no metrics instance exists", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const { flushMetrics } = await import("services/delivery-metrics"); + + await flushMetrics(); + + expect(mockMetrics.flush).not.toHaveBeenCalled(); + }); + + it("resetMetrics clears the cached instance", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery-metrics"); + const { emitDeliveryAttempt, resetMetrics } = mod; + + emitDeliveryAttempt("t-1"); + resetMetrics(); + emitDeliveryAttempt("t-2"); + + expect(mockCreateMetricsLogger).toHaveBeenCalledTimes(2); + }); +}); diff --git a/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts b/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts new file mode 100644 index 00000000..8f0b943b --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts @@ -0,0 +1,115 @@ +import { + recordCircuitBreakerOpen, + recordDeliveryAttempt, + recordDeliveryFailure, + recordDeliveryPermanentFailure, + recordDeliveryRateLimited, + recordDeliverySuccess, +} from "services/delivery-observability"; + +jest.mock("services/logger", () => ({ + logger: { + info: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + }, +})); + +jest.mock("services/delivery-metrics", () => ({ + emitDeliveryAttempt: jest.fn(), + emitDeliverySuccess: jest.fn(), + emitDeliveryFailure: jest.fn(), + emitDeliveryPermanentFailure: jest.fn(), + emitCircuitBreakerOpen: jest.fn(), + emitRateLimited: jest.fn(), +})); + +describe("delivery-observability", () => { + it("recordDeliveryAttempt emits metric and logs", () => { + const { emitDeliveryAttempt } = jest.requireMock( + "services/delivery-metrics", + ); + const { logger } = jest.requireMock("services/logger"); + + recordDeliveryAttempt("client-1", "target-1"); + + expect(emitDeliveryAttempt).toHaveBeenCalledWith("target-1"); + expect(logger.info).toHaveBeenCalledWith( + "Attempting delivery", + expect.objectContaining({ clientId: "client-1", targetId: "target-1" }), + ); + }); + + it("recordDeliverySuccess emits metric and logs", () => { + const { emitDeliverySuccess } = jest.requireMock( + "services/delivery-metrics", + ); + const { logger } = jest.requireMock("services/logger"); + + recordDeliverySuccess("client-1", "target-1"); + + expect(emitDeliverySuccess).toHaveBeenCalledWith("target-1"); + expect(logger.info).toHaveBeenCalledWith( + "Delivery succeeded", + expect.objectContaining({ clientId: "client-1", targetId: "target-1" }), + ); + }); + + it("recordDeliveryPermanentFailure emits metric and logs warning", () => { + const { emitDeliveryPermanentFailure } = jest.requireMock( + "services/delivery-metrics", + ); + const { logger } = jest.requireMock("services/logger"); + + recordDeliveryPermanentFailure("client-1", "target-1"); + + expect(emitDeliveryPermanentFailure).toHaveBeenCalledWith("target-1"); + expect(logger.warn).toHaveBeenCalledWith( + "Permanent delivery failure — sending to DLQ", + expect.objectContaining({ clientId: "client-1", targetId: "target-1" }), + ); + }); + + it("recordDeliveryRateLimited emits metric and logs", () => { + const { emitRateLimited } = jest.requireMock("services/delivery-metrics"); + const { logger } = jest.requireMock("services/logger"); + + recordDeliveryRateLimited("client-1", "target-1"); + + expect(emitRateLimited).toHaveBeenCalledWith("target-1"); + expect(logger.info).toHaveBeenCalledWith( + "Rate limited (429)", + expect.objectContaining({ clientId: "client-1", targetId: "target-1" }), + ); + }); + + it("recordDeliveryFailure emits metric and logs warning with context", () => { + const { emitDeliveryFailure } = jest.requireMock( + "services/delivery-metrics", + ); + const { logger } = jest.requireMock("services/logger"); + + recordDeliveryFailure("client-1", "target-1", 503, 30); + + expect(emitDeliveryFailure).toHaveBeenCalledWith("target-1"); + expect(logger.warn).toHaveBeenCalledWith( + "Transient delivery failure — requeuing", + expect.objectContaining({ + clientId: "client-1", + targetId: "target-1", + statusCode: 503, + backoffSec: 30, + }), + ); + }); + + it("recordCircuitBreakerOpen emits metric", () => { + const { emitCircuitBreakerOpen } = jest.requireMock( + "services/delivery-metrics", + ); + + recordCircuitBreakerOpen("target-1"); + + expect(emitCircuitBreakerOpen).toHaveBeenCalledWith("target-1"); + }); +}); diff --git a/lambdas/https-client-lambda/src/__tests__/dlq-sender.test.ts b/lambdas/https-client-lambda/src/__tests__/dlq-sender.test.ts new file mode 100644 index 00000000..21ae3700 --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/dlq-sender.test.ts @@ -0,0 +1,57 @@ +import { SendMessageCommand } from "@aws-sdk/client-sqs"; + +import { sendToDlq } from "services/dlq-sender"; + +const mockSend = jest.fn(); +jest.mock("@aws-sdk/client-sqs", () => { + const actual = jest.requireActual("@aws-sdk/client-sqs"); + return { + ...actual, + SQSClient: jest.fn().mockImplementation(() => ({ + send: (...args: unknown[]) => mockSend(...args), + })), + }; +}); + +process.env.DLQ_URL = "https://sqs.eu-west-2.invalid/123456789/test-dlq"; + +describe("sendToDlq", () => { + beforeEach(() => { + mockSend.mockReset(); + }); + + it("sends SendMessageCommand with correct QueueUrl and MessageBody", async () => { + mockSend.mockResolvedValue({}); + + await sendToDlq('{"test":"message"}'); + + expect(mockSend).toHaveBeenCalledTimes(1); + const command = mockSend.mock.calls[0][0]; + expect(command).toBeInstanceOf(SendMessageCommand); + expect(command.input).toEqual({ + QueueUrl: "https://sqs.eu-west-2.invalid/123456789/test-dlq", + MessageBody: '{"test":"message"}', + }); + }); + + it("surfaces SDK errors", async () => { + mockSend.mockRejectedValue(new Error("SQS send failed")); + + await expect(sendToDlq("body")).rejects.toThrow("SQS send failed"); + }); + + it("throws when DLQ_URL is not set", async () => { + let sendFn: typeof sendToDlq; + const saved = process.env.DLQ_URL; + delete process.env.DLQ_URL; + + jest.isolateModules(() => { + // eslint-disable-next-line @typescript-eslint/no-require-imports -- jest.isolateModules requires synchronous require + sendFn = require("services/dlq-sender").sendToDlq; + }); + + await expect(sendFn!("body")).rejects.toThrow("DLQ_URL is required"); + + process.env.DLQ_URL = saved; + }); +}); diff --git a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts new file mode 100644 index 00000000..68280a5e --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts @@ -0,0 +1,278 @@ +import { + type EndpointGateConfig, + admit, + getRedisClient, + recordResult, + resetAdmitSha, + resetRedisClient, +} from "services/endpoint-gate"; + +jest.mock("services/logger"); + +const mockSendCommand = jest.fn(); +const mockConnect = jest.fn().mockResolvedValue(undefined); +const mockOn = jest.fn(); + +jest.mock("@redis/client", () => ({ + createClient: jest.fn(() => ({ + sendCommand: mockSendCommand, + connect: mockConnect, + on: mockOn, + isOpen: true, + })), +})); + +const defaultConfig: EndpointGateConfig = { + burstCapacity: 10, + cbProbeIntervalMs: 60_000, + decayPeriodMs: 300_000, + cbWindowPeriodMs: 60_000, + cbErrorThreshold: 0.5, + cbMinAttempts: 10, + cbCooldownMs: 60_000, +}; + +const mockRedis = { + sendCommand: mockSendCommand, + connect: mockConnect, + on: mockOn, + isOpen: true, +} as never; + +beforeEach(() => { + jest.clearAllMocks(); + resetAdmitSha(); +}); + +describe("admit", () => { + it("returns allowed when tokens available", async () => { + mockSendCommand.mockResolvedValueOnce( + JSON.stringify({ allowed: true, probe: false, effectiveRate: 10 }), + ); + + const result = await admit(mockRedis, "target-1", 10, true, defaultConfig); + + expect(result).toEqual({ allowed: true, probe: false, effectiveRate: 10 }); + expect(mockSendCommand).toHaveBeenCalledWith( + expect.arrayContaining(["EVALSHA"]), + ); + }); + + it("returns rate_limited when tokens exhausted", async () => { + mockSendCommand.mockResolvedValueOnce( + JSON.stringify({ + allowed: false, + reason: "rate_limited", + retryAfterMs: 500, + effectiveRate: 10, + }), + ); + + const result = await admit(mockRedis, "target-1", 10, false, defaultConfig); + + expect(result).toEqual({ + allowed: false, + reason: "rate_limited", + retryAfterMs: 500, + effectiveRate: 10, + }); + }); + + it("returns circuit_open with probe slot available", async () => { + mockSendCommand.mockResolvedValueOnce( + JSON.stringify({ allowed: true, probe: true, effectiveRate: 0 }), + ); + + const result = await admit(mockRedis, "target-1", 10, true, defaultConfig); + + expect(result).toEqual({ allowed: true, probe: true, effectiveRate: 0 }); + }); + + it("returns circuit_open without probe slot", async () => { + mockSendCommand.mockResolvedValueOnce( + JSON.stringify({ + allowed: false, + reason: "circuit_open", + retryAfterMs: 30_000, + effectiveRate: 0, + }), + ); + + const result = await admit(mockRedis, "target-1", 10, true, defaultConfig); + + expect(result).toEqual({ + allowed: false, + reason: "circuit_open", + retryAfterMs: 30_000, + effectiveRate: 0, + }); + }); + + it("falls back to EVAL on NOSCRIPT error", async () => { + mockSendCommand + .mockRejectedValueOnce(new Error("NOSCRIPT No matching script")) + .mockResolvedValueOnce( + JSON.stringify({ allowed: true, probe: false, effectiveRate: 10 }), + ); + + const result = await admit(mockRedis, "target-1", 10, true, defaultConfig); + + expect(result).toEqual({ allowed: true, probe: false, effectiveRate: 10 }); + expect(mockSendCommand).toHaveBeenCalledTimes(2); + expect(mockSendCommand).toHaveBeenNthCalledWith( + 1, + expect.arrayContaining(["EVALSHA"]), + ); + expect(mockSendCommand).toHaveBeenNthCalledWith( + 2, + expect.arrayContaining(["EVAL"]), + ); + }); + + it("propagates non-NOSCRIPT Redis errors", async () => { + mockSendCommand.mockRejectedValueOnce(new Error("Connection refused")); + + await expect( + admit(mockRedis, "target-1", 10, true, defaultConfig), + ).rejects.toThrow("Connection refused"); + }); + + it("passes cbEnabled=0 when circuit breaker is disabled", async () => { + mockSendCommand.mockResolvedValueOnce( + JSON.stringify({ allowed: true, probe: false, effectiveRate: 10 }), + ); + + await admit(mockRedis, "target-1", 10, false, defaultConfig); + + const args = mockSendCommand.mock.calls[0]![0] as string[]; + const cbEnabledArg = args[9]; + expect(cbEnabledArg).toBe("0"); + }); + + it("passes correct keys for target-specific hashes", async () => { + mockSendCommand.mockResolvedValueOnce( + JSON.stringify({ allowed: true, probe: false, effectiveRate: 5 }), + ); + + await admit(mockRedis, "my-target", 5, true, defaultConfig); + + const args = mockSendCommand.mock.calls[0]![0] as string[]; + expect(args[3]).toBe("rl:my-target"); + expect(args[4]).toBe("cb:my-target"); + }); +}); + +describe("recordResult", () => { + it("returns closed on success below threshold", async () => { + mockSendCommand.mockResolvedValueOnce( + JSON.stringify({ ok: true, state: "closed" }), + ); + + const result = await recordResult( + mockRedis, + "target-1", + true, + defaultConfig, + ); + + expect(result).toEqual({ ok: true, state: "closed" }); + expect(mockSendCommand).toHaveBeenCalledWith( + expect.arrayContaining(["EVALSHA"]), + ); + }); + + it("returns opened when failure crosses threshold", async () => { + mockSendCommand.mockResolvedValueOnce( + JSON.stringify({ ok: false, state: "opened" }), + ); + + const result = await recordResult( + mockRedis, + "target-1", + false, + defaultConfig, + ); + + expect(result).toEqual({ ok: false, state: "opened" }); + }); + + it("falls back to EVAL on NOSCRIPT error", async () => { + mockSendCommand + .mockRejectedValueOnce(new Error("NOSCRIPT No matching script")) + .mockResolvedValueOnce(JSON.stringify({ ok: true, state: "closed" })); + + const result = await recordResult( + mockRedis, + "target-1", + true, + defaultConfig, + ); + + expect(result).toEqual({ ok: true, state: "closed" }); + expect(mockSendCommand).toHaveBeenCalledTimes(2); + }); + + it("propagates non-NOSCRIPT Redis errors", async () => { + mockSendCommand.mockRejectedValueOnce(new Error("Connection refused")); + + await expect( + recordResult(mockRedis, "target-1", false, defaultConfig), + ).rejects.toThrow("Connection refused"); + }); + + it("passes correct cb key for target", async () => { + mockSendCommand.mockResolvedValueOnce( + JSON.stringify({ ok: true, state: "closed" }), + ); + + await recordResult(mockRedis, "my-target", true, defaultConfig); + + const args = mockSendCommand.mock.calls[0]![0] as string[]; + expect(args[3]).toBe("cb:my-target"); + }); +}); + +describe("getRedisClient", () => { + beforeEach(() => { + resetRedisClient(); + delete process.env.ELASTICACHE_ENDPOINT; + }); + + it("throws when ELASTICACHE_ENDPOINT is not set", async () => { + await expect(getRedisClient()).rejects.toThrow( + "ELASTICACHE_ENDPOINT is required", + ); + }); + + it("creates and connects a Redis client", async () => { + process.env.ELASTICACHE_ENDPOINT = "localhost"; + + const client = await getRedisClient(); + + expect(client).toBeDefined(); + expect(mockConnect).toHaveBeenCalled(); + }); + + it("returns cached client when already open", async () => { + process.env.ELASTICACHE_ENDPOINT = "localhost"; + + const first = await getRedisClient(); + const second = await getRedisClient(); + + expect(first).toBe(second); + expect(mockConnect).toHaveBeenCalledTimes(1); + }); + + it("registers error handler on client", async () => { + process.env.ELASTICACHE_ENDPOINT = "localhost"; + + await getRedisClient(); + + expect(mockOn).toHaveBeenCalledWith("error", expect.any(Function)); + + const errorHandler = mockOn.mock.calls.find( + (c: unknown[]) => c[0] === "error", + )![1] as (err: Error) => void; + errorHandler(new Error("test error")); + }); +}); diff --git a/lambdas/https-client-lambda/src/__tests__/fixtures/handler-fixtures.ts b/lambdas/https-client-lambda/src/__tests__/fixtures/handler-fixtures.ts new file mode 100644 index 00000000..cf262c7b --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/fixtures/handler-fixtures.ts @@ -0,0 +1,38 @@ +import type { SQSRecord } from "aws-lambda"; + +export const DEFAULT_TARGET = { + targetId: "target-1", + type: "API" as const, + invocationEndpoint: "https://webhook.example.invalid", + invocationMethod: "POST" as const, + invocationRateLimit: 10, + apiKey: { headerName: "x-api-key", headerValue: "secret-key" }, + mtls: { enabled: true }, + certPinning: { enabled: false }, +}; + +export const makeRecord = (overrides: Partial = {}): SQSRecord => ({ + messageId: "msg-1", + receiptHandle: "receipt-1", + body: JSON.stringify({ + payload: { + data: [ + { type: "MessageStatus", attributes: { messageStatus: "delivered" } }, + ], + }, + subscriptionId: "sub-1", + targetId: "target-1", + }), + attributes: { + ApproximateReceiveCount: "1", + SentTimestamp: "0", + SenderId: "sender", + ApproximateFirstReceiveTimestamp: "0", + }, + messageAttributes: {}, + md5OfBody: "abc", + eventSource: "aws:sqs", + eventSourceARN: "arn:aws:sqs:eu-west-2:123:queue", + awsRegion: "eu-west-2", + ...overrides, +}); diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts new file mode 100644 index 00000000..7c48327d --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -0,0 +1,443 @@ +import { processRecords } from "handler"; +import { + DEFAULT_TARGET, + makeRecord, +} from "__tests__/fixtures/handler-fixtures"; + +jest.mock("services/logger", () => ({ + logger: { + info: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + debug: jest.fn(), + }, +})); + +const mockLoadTargetConfig = jest.fn(); +jest.mock("services/config-loader", () => ({ + loadTargetConfig: (...args: unknown[]) => mockLoadTargetConfig(...args), +})); + +const mockGetApplicationId = jest.fn(); +jest.mock("services/ssm-applications-map", () => ({ + getApplicationId: (...args: unknown[]) => mockGetApplicationId(...args), +})); + +const mockSignPayload = jest.fn(); +jest.mock("services/payload-signer", () => ({ + signPayload: (...args: unknown[]) => mockSignPayload(...args), +})); + +const mockBuildAgent = jest.fn(); +jest.mock("services/delivery/tls-agent-factory", () => ({ + buildAgent: (...args: unknown[]) => mockBuildAgent(...args), +})); + +const mockDeliverPayload = jest.fn(); +jest.mock("services/delivery/https-client", () => ({ + deliverPayload: (...args: unknown[]) => mockDeliverPayload(...args), +})); + +const mockSendToDlq = jest.fn(); +jest.mock("services/dlq-sender", () => ({ + sendToDlq: (...args: unknown[]) => mockSendToDlq(...args), +})); + +const mockChangeVisibility = jest.fn(); +jest.mock("services/sqs-visibility", () => ({ + changeVisibility: (...args: unknown[]) => mockChangeVisibility(...args), +})); + +const mockJitteredBackoff = jest.fn(); +const mockIsWindowExhausted = jest.fn(); +const mockHandleRateLimitedRecord = jest.fn(); +jest.mock("services/delivery/retry-policy", () => ({ + jitteredBackoffSeconds: (...args: unknown[]) => mockJitteredBackoff(...args), + isWindowExhausted: (...args: unknown[]) => mockIsWindowExhausted(...args), + handleRateLimitedRecord: (...args: unknown[]) => + mockHandleRateLimitedRecord(...args), +})); + +const mockAdmit = jest.fn(); +const mockGetRedisClient = jest.fn(); +const mockRecordResult = jest.fn(); +jest.mock("services/endpoint-gate", () => ({ + admit: (...args: unknown[]) => mockAdmit(...args), + recordResult: (...args: unknown[]) => mockRecordResult(...args), + getRedisClient: (...args: unknown[]) => mockGetRedisClient(...args), +})); + +jest.mock("services/delivery-metrics", () => ({ + emitDeliveryAttempt: jest.fn(), + emitDeliverySuccess: jest.fn(), + emitDeliveryFailure: jest.fn(), + emitDeliveryPermanentFailure: jest.fn(), + emitCircuitBreakerOpen: jest.fn(), + emitRateLimited: jest.fn(), + flushMetrics: jest.fn().mockResolvedValue(undefined), +})); + +process.env.CLIENT_ID = "client-1"; + +describe("processRecords", () => { + const mockAgent = {}; + + beforeEach(() => { + jest.clearAllMocks(); + mockLoadTargetConfig.mockResolvedValue(DEFAULT_TARGET); + mockGetApplicationId.mockResolvedValue("app-id-1"); + mockSignPayload.mockReturnValue("signature-abc"); + mockBuildAgent.mockResolvedValue(mockAgent); + mockDeliverPayload.mockResolvedValue({ ok: true }); + mockSendToDlq.mockResolvedValue(undefined); + mockChangeVisibility.mockResolvedValue(undefined); + mockJitteredBackoff.mockReturnValue(5); + mockIsWindowExhausted.mockReturnValue(false); + mockHandleRateLimitedRecord.mockRejectedValue( + new Error("Rate limited — requeue"), + ); + mockGetRedisClient.mockResolvedValue({}); + mockAdmit.mockResolvedValue({ + allowed: true, + probe: false, + effectiveRate: 10, + }); + mockRecordResult.mockResolvedValue({ ok: true, state: "closed" }); + }); + + it("returns no failures on successful delivery", async () => { + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([]); + expect(mockLoadTargetConfig).toHaveBeenCalledWith("client-1", "target-1"); + expect(mockGetApplicationId).toHaveBeenCalledWith("client-1"); + expect(mockSignPayload).toHaveBeenCalledWith( + "app-id-1", + "secret-key", + expect.objectContaining({ data: expect.any(Array) }), + ); + expect(mockBuildAgent).toHaveBeenCalledWith(DEFAULT_TARGET); + expect(mockDeliverPayload).toHaveBeenCalledWith( + DEFAULT_TARGET, + expect.any(String), + "signature-abc", + mockAgent, + ); + }); + + it("sends permanent failure to DLQ and returns no failure", async () => { + mockDeliverPayload.mockResolvedValue({ ok: false, permanent: true }); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([]); + expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body); + }); + + it("returns failure for transient 5xx errors", async () => { + mockDeliverPayload.mockResolvedValue({ + ok: false, + permanent: false, + statusCode: 503, + }); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); + }); + + it("returns failure for 429 rate-limited responses", async () => { + mockDeliverPayload.mockResolvedValue({ + ok: false, + permanent: false, + statusCode: 429, + retryAfterHeader: "60", + }); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); + expect(mockHandleRateLimitedRecord).toHaveBeenCalledWith( + makeRecord(), + "client-1", + "target-1", + "60", + 1, + ); + }); + + it("processes multiple records independently", async () => { + const record1 = makeRecord({ messageId: "msg-1" }); + const record2 = makeRecord({ messageId: "msg-2" }); + + mockDeliverPayload + .mockResolvedValueOnce({ ok: true }) + .mockResolvedValueOnce({ + ok: false, + permanent: false, + statusCode: 500, + }); + + const failures = await processRecords([record1, record2]); + + expect(failures).toEqual([{ itemIdentifier: "msg-2" }]); + }); + + it("an unexpected error on one record does not prevent subsequent records being processed", async () => { + const record1 = makeRecord({ messageId: "msg-1" }); + const record2 = makeRecord({ messageId: "msg-2" }); + + mockLoadTargetConfig + .mockRejectedValueOnce(new Error("S3 unavailable")) + .mockResolvedValueOnce(DEFAULT_TARGET); + + const failures = await processRecords([record1, record2]); + + expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); + expect(mockDeliverPayload).toHaveBeenCalledTimes(1); + }); + + it("returns failure when CLIENT_ID is not set", async () => { + const saved = process.env.CLIENT_ID; + delete process.env.CLIENT_ID; + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); + + process.env.CLIENT_ID = saved; + }); + + it("sends to DLQ when retry window is exhausted", async () => { + mockIsWindowExhausted.mockReturnValue(true); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([]); + expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body); + expect(mockDeliverPayload).not.toHaveBeenCalled(); + }); + + it("calls changeVisibility with backoff on 5xx then throws", async () => { + mockDeliverPayload.mockResolvedValue({ + ok: false, + permanent: false, + statusCode: 503, + }); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 5); + }); + + it("delegates 429 handling to handleRateLimitedRecord", async () => { + mockDeliverPayload.mockResolvedValue({ + ok: false, + permanent: false, + statusCode: 429, + retryAfterHeader: "120", + }); + + await processRecords([makeRecord()]); + + expect(mockHandleRateLimitedRecord).toHaveBeenCalledWith( + makeRecord(), + "client-1", + "target-1", + "120", + 1, + ); + }); + + it("requeues when rate limited by endpoint gate", async () => { + mockAdmit.mockResolvedValue({ + allowed: false, + reason: "rate_limited", + retryAfterMs: 2000, + effectiveRate: 10, + }); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 2); + expect(mockSendToDlq).not.toHaveBeenCalled(); + expect(mockDeliverPayload).not.toHaveBeenCalled(); + }); + + it("requeues when circuit is open", async () => { + mockAdmit.mockResolvedValue({ + allowed: false, + reason: "circuit_open", + retryAfterMs: 30_000, + effectiveRate: 0, + }); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 30); + expect(mockSendToDlq).not.toHaveBeenCalled(); + expect(mockDeliverPayload).not.toHaveBeenCalled(); + }); + + it("proceeds to delivery when circuit breaker is disabled", async () => { + const targetNoCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: false } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetNoCb); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([]); + expect(mockAdmit).toHaveBeenCalledWith( + expect.anything(), + "target-1", + 10, + false, + expect.any(Object), + ); + expect(mockDeliverPayload).toHaveBeenCalled(); + }); + + it("calls recordResult(true) on successful delivery when CB enabled", async () => { + const targetCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: true } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetCb); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([]); + expect(mockRecordResult).toHaveBeenCalledWith( + expect.anything(), + "target-1", + true, + expect.any(Object), + ); + }); + + it("calls recordResult(false) on 5xx before visibility change", async () => { + const targetCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: true } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetCb); + mockDeliverPayload.mockResolvedValue({ + ok: false, + permanent: false, + statusCode: 503, + }); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); + expect(mockRecordResult).toHaveBeenCalledWith( + expect.anything(), + "target-1", + false, + expect.any(Object), + ); + expect(mockChangeVisibility).toHaveBeenCalled(); + }); + + it("does not call recordResult on rate-limited path", async () => { + mockAdmit.mockResolvedValue({ + allowed: false, + reason: "rate_limited", + retryAfterMs: 2000, + effectiveRate: 10, + }); + + await processRecords([makeRecord()]); + + expect(mockRecordResult).not.toHaveBeenCalled(); + }); + + it("does not call recordResult on 429 path", async () => { + mockDeliverPayload.mockResolvedValue({ + ok: false, + permanent: false, + statusCode: 429, + retryAfterHeader: "60", + }); + + await processRecords([makeRecord()]); + + expect(mockRecordResult).not.toHaveBeenCalled(); + }); + + it("does not call recordResult when CB is disabled on success", async () => { + const targetNoCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: false } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetNoCb); + + await processRecords([makeRecord()]); + + expect(mockRecordResult).not.toHaveBeenCalled(); + }); + + it("emits CircuitBreakerOpen metric when recordResult returns opened", async () => { + const targetCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: true } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetCb); + mockDeliverPayload.mockResolvedValue({ + ok: false, + permanent: false, + statusCode: 503, + }); + mockRecordResult.mockResolvedValue({ ok: false, state: "opened" }); + + const { emitCircuitBreakerOpen } = jest.requireMock( + "services/delivery-metrics", + ); + + await processRecords([makeRecord()]); + + expect(emitCircuitBreakerOpen).toHaveBeenCalledWith("target-1"); + }); + + it("does not emit CircuitBreakerOpen when recordResult returns closed", async () => { + const targetCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: true } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetCb); + mockDeliverPayload.mockResolvedValue({ + ok: false, + permanent: false, + statusCode: 503, + }); + mockRecordResult.mockResolvedValue({ ok: true, state: "closed" }); + + const { emitCircuitBreakerOpen } = jest.requireMock( + "services/delivery-metrics", + ); + + await processRecords([makeRecord()]); + + expect(emitCircuitBreakerOpen).not.toHaveBeenCalled(); + }); + + it("emits RateLimited metric on 429 response", async () => { + mockDeliverPayload.mockResolvedValue({ + ok: false, + permanent: false, + statusCode: 429, + retryAfterHeader: "60", + }); + + const { emitRateLimited } = jest.requireMock("services/delivery-metrics"); + + await processRecords([makeRecord()]); + + expect(emitRateLimited).toHaveBeenCalledWith("target-1"); + }); +}); diff --git a/lambdas/https-client-lambda/src/__tests__/https-client.test.ts b/lambdas/https-client-lambda/src/__tests__/https-client.test.ts new file mode 100644 index 00000000..5bfa2864 --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/https-client.test.ts @@ -0,0 +1,260 @@ +/* eslint-disable unicorn/prefer-event-target -- Node.js http module mock requires EventEmitter API */ +import { EventEmitter } from "node:events"; +import https, { Agent } from "node:https"; +import type { CallbackTarget } from "@nhs-notify-client-callbacks/models"; + +import { deliverPayload } from "services/delivery/https-client"; + +jest.mock("services/delivery/tls-agent-factory", () => ({ + PERMANENT_TLS_ERROR_CODES: new Set([ + "CERT_HAS_EXPIRED", + "DEPTH_ZERO_SELF_SIGNED_CERT", + "ERR_CERT_PINNING_FAILED", + "ERR_TLS_CERT_ALTNAME_INVALID", + "SELF_SIGNED_CERT_IN_CHAIN", + "UNABLE_TO_VERIFY_LEAF_SIGNATURE", + ]), +})); + +const createTarget = (): CallbackTarget => ({ + targetId: "target-1", + type: "API", + invocationEndpoint: "https://webhook.example.invalid:8443/callback", + invocationMethod: "POST", + invocationRateLimit: 10, + apiKey: { headerName: "x-api-key", headerValue: "secret" }, + mtls: { enabled: false }, + certPinning: { enabled: false }, +}); + +const createMockAgent = () => ({}) as Agent; + +type MockResponse = EventEmitter & { + statusCode: number; + headers: Record; + resume: jest.Mock; +}; + +function mockHttpsRequest( + statusCode: number, + headers: Record = {}, +) { + const mockReq = new EventEmitter() as EventEmitter & { + end: jest.Mock; + destroy: jest.Mock; + }; + mockReq.end = jest.fn(); + mockReq.destroy = jest.fn(); + + jest.spyOn(https, "request").mockImplementation((...args: unknown[]) => { + const callback = args.find((a) => typeof a === "function") as + | ((res: MockResponse) => void) + | undefined; + + const res: MockResponse = Object.assign(new EventEmitter(), { + statusCode, + headers, + resume: jest.fn(), + }); + + if (callback) { + process.nextTick(() => callback(res)); + } + + return mockReq as unknown as ReturnType; + }); + + return mockReq; +} + +function mockHttpsRequestError(errorCode: string) { + const mockReq = new EventEmitter() as EventEmitter & { + end: jest.Mock; + destroy: jest.Mock; + }; + mockReq.end = jest.fn(); + mockReq.destroy = jest.fn(); + + jest.spyOn(https, "request").mockImplementation(() => { + process.nextTick(() => { + const error = new Error("TLS error") as NodeJS.ErrnoException; + error.code = errorCode; + mockReq.emit("error", error); + }); + + return mockReq as unknown as ReturnType; + }); + + return mockReq; +} + +function mockHttpsRequestTimeout() { + const mockReq = new EventEmitter() as EventEmitter & { + end: jest.Mock; + destroy: jest.Mock; + }; + mockReq.end = jest.fn(); + mockReq.destroy = jest.fn((error?: Error) => { + if (error) { + process.nextTick(() => mockReq.emit("error", error)); + } + }); + + jest.spyOn(https, "request").mockImplementation(() => { + process.nextTick(() => mockReq.emit("timeout")); + return mockReq as unknown as ReturnType; + }); + + return mockReq; +} + +describe("deliverPayload", () => { + afterEach(() => { + jest.restoreAllMocks(); + }); + + it("returns ok: true on 2xx", async () => { + mockHttpsRequest(200); + + const result = await deliverPayload( + createTarget(), + '{"test":true}', + "sig-abc", + createMockAgent(), + ); + + expect(result).toEqual({ ok: true }); + }); + + it("returns permanent: true on 4xx non-429", async () => { + mockHttpsRequest(400); + + const result = await deliverPayload( + createTarget(), + '{"test":true}', + "sig-abc", + createMockAgent(), + ); + + expect(result).toEqual({ ok: false, permanent: true }); + }); + + it("returns permanent: true on TLS error CERT_HAS_EXPIRED", async () => { + mockHttpsRequestError("CERT_HAS_EXPIRED"); + + const result = await deliverPayload( + createTarget(), + '{"test":true}', + "sig-abc", + createMockAgent(), + ); + + expect(result).toEqual({ ok: false, permanent: true }); + }); + + it("returns permanent: true on TLS pinning error", async () => { + mockHttpsRequestError("ERR_CERT_PINNING_FAILED"); + + const result = await deliverPayload( + createTarget(), + '{"test":true}', + "sig-abc", + createMockAgent(), + ); + + expect(result).toEqual({ ok: false, permanent: true }); + }); + + it("returns ok: false, permanent: false on 5xx", async () => { + mockHttpsRequest(503); + + const result = await deliverPayload( + createTarget(), + '{"test":true}', + "sig-abc", + createMockAgent(), + ); + + expect(result).toEqual({ ok: false, permanent: false, statusCode: 503 }); + }); + + it("returns 429 with Retry-After header value", async () => { + mockHttpsRequest(429, { "retry-after": "60" }); + + const result = await deliverPayload( + createTarget(), + '{"test":true}', + "sig-abc", + createMockAgent(), + ); + + expect(result).toEqual({ + ok: false, + permanent: false, + statusCode: 429, + retryAfterHeader: "60", + }); + }); + + it("returns 429 with undefined retryAfterHeader when header is absent", async () => { + mockHttpsRequest(429); + + const result = await deliverPayload( + createTarget(), + '{"test":true}', + "sig-abc", + createMockAgent(), + ); + + expect(result).toEqual({ + ok: false, + permanent: false, + statusCode: 429, + retryAfterHeader: undefined, + }); + }); + + it("returns ok: false, permanent: false on TCP error", async () => { + mockHttpsRequestError("ECONNREFUSED"); + + const result = await deliverPayload( + createTarget(), + '{"test":true}', + "sig-abc", + createMockAgent(), + ); + + expect(result).toEqual({ ok: false, permanent: false, statusCode: 0 }); + }); + + it("uses port 443 when URL has no explicit port", async () => { + mockHttpsRequest(200); + const target = createTarget(); + target.invocationEndpoint = "https://webhook.example.invalid/callback"; + + const result = await deliverPayload( + target, + '{"test":true}', + "sig-abc", + createMockAgent(), + ); + + expect(result).toEqual({ ok: true }); + const callUrl = (https.request as jest.Mock).mock.calls[0][0] as URL; + expect(callUrl).toBeInstanceOf(URL); + expect(callUrl.port).toBe(""); + }); + + it("returns transient failure on request timeout", async () => { + mockHttpsRequestTimeout(); + + const result = await deliverPayload( + createTarget(), + '{"test":true}', + "sig-abc", + createMockAgent(), + ); + + expect(result).toEqual({ ok: false, permanent: false, statusCode: 0 }); + }); +}); diff --git a/lambdas/https-client-lambda/src/__tests__/index.test.ts b/lambdas/https-client-lambda/src/__tests__/index.test.ts new file mode 100644 index 00000000..53394149 --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/index.test.ts @@ -0,0 +1,36 @@ +import { handler } from "index"; +import { processRecords } from "handler"; + +jest.mock("handler", () => ({ + processRecords: jest.fn().mockResolvedValue([]), +})); + +describe("handler", () => { + it("returns batchItemFailures from processRecords", async () => { + const event = { + Records: [ + { + messageId: "msg-1", + receiptHandle: "r-1", + body: "{}", + attributes: { + ApproximateReceiveCount: "1", + SentTimestamp: "0", + SenderId: "sender", + ApproximateFirstReceiveTimestamp: "0", + }, + messageAttributes: {}, + md5OfBody: "abc", + eventSource: "aws:sqs", + eventSourceARN: "arn:aws:sqs:eu-west-2:123:queue", + awsRegion: "eu-west-2", + }, + ], + }; + + const result = await handler(event); + + expect(result).toEqual({ batchItemFailures: [] }); + expect(processRecords).toHaveBeenCalledWith(event.Records); + }); +}); diff --git a/lambdas/https-client-lambda/src/__tests__/payload-signer.test.ts b/lambdas/https-client-lambda/src/__tests__/payload-signer.test.ts new file mode 100644 index 00000000..191f85dc --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/payload-signer.test.ts @@ -0,0 +1,52 @@ +import { createHmac } from "node:crypto"; +import { signPayload } from "services/payload-signer"; + +const makePayload = () => + ({ + data: [ + { type: "MessageStatus", attributes: { messageStatus: "delivered" } }, + ], + }) as Parameters[2]; + +describe("signPayload", () => { + it("produces correct HMAC-SHA256 output for a known input", () => { + const payload = makePayload(); + // eslint-disable-next-line sonarjs/hardcoded-secret-signatures -- test fixture, not a real secret + const expected = createHmac("sha256", "app-1.key-1") + .update(JSON.stringify(payload)) + .digest("hex"); + + expect(signPayload("app-1", "key-1", payload)).toBe(expected); + }); + + it("produces different signatures for different appId/apiKey combinations", () => { + const payload = makePayload(); + + const sig1 = signPayload("app-1", "key-1", payload); + const sig2 = signPayload("app-2", "key-2", payload); + + expect(sig1).not.toBe(sig2); + }); + + it("produces the same signature for the same inputs", () => { + const payload = makePayload(); + + const sig1 = signPayload("app-1", "key-1", payload); + const sig2 = signPayload("app-1", "key-1", payload); + + expect(sig1).toBe(sig2); + }); + + it("produces a deterministic non-empty signature for an empty payload object", () => { + const emptyPayload = {} as Parameters[2]; + + const sig = signPayload("app-1", "key-1", emptyPayload); + + expect(sig).toBeTruthy(); + expect(typeof sig).toBe("string"); + expect(sig.length).toBeGreaterThan(0); + + const sig2 = signPayload("app-1", "key-1", emptyPayload); + expect(sig).toBe(sig2); + }); +}); diff --git a/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts b/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts new file mode 100644 index 00000000..ed9fbf9a --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts @@ -0,0 +1,175 @@ +import type { SQSRecord } from "aws-lambda"; +import { + exceedsSqsMaxVisibility, + handleRateLimitedRecord, + isWindowExhausted, + jitteredBackoffSeconds, + parseRetryAfter, +} from "services/delivery/retry-policy"; + +const mockSendToDlq = jest.fn(); +jest.mock("services/dlq-sender", () => ({ + sendToDlq: (...args: unknown[]) => mockSendToDlq(...args), +})); + +const mockChangeVisibility = jest.fn(); +jest.mock("services/sqs-visibility", () => ({ + changeVisibility: (...args: unknown[]) => mockChangeVisibility(...args), +})); + +jest.mock("services/logger", () => ({ + logger: { info: jest.fn(), warn: jest.fn(), error: jest.fn() }, +})); + +describe("jitteredBackoffSeconds", () => { + it("produces value in [0, 5) at receiveCount=1", () => { + for (let i = 0; i < 100; i++) { + const val = jitteredBackoffSeconds(1); + expect(val).toBeGreaterThanOrEqual(0); + expect(val).toBeLessThan(5); + } + }); + + it("produces value in [0, 300) at receiveCount=10 (cap)", () => { + for (let i = 0; i < 100; i++) { + const val = jitteredBackoffSeconds(10); + expect(val).toBeGreaterThanOrEqual(0); + expect(val).toBeLessThan(300); + } + }); + + it("respects cap at very high receiveCount", () => { + for (let i = 0; i < 50; i++) { + const val = jitteredBackoffSeconds(100); + expect(val).toBeLessThan(300); + } + }); +}); + +describe("parseRetryAfter", () => { + it("parses integer string", () => { + expect(parseRetryAfter("120")).toBe(120); + }); + + it("returns 0 for negative values", () => { + expect(parseRetryAfter("-5")).toBe(0); + }); + + it("parses HTTP date string", () => { + const futureDate = new Date(Date.now() + 60_000); + const result = parseRetryAfter(futureDate.toUTCString()); + expect(result).toBeGreaterThanOrEqual(58); + expect(result).toBeLessThanOrEqual(61); + }); + + it("returns 0 for past HTTP date", () => { + const pastDate = new Date(Date.now() - 60_000); + expect(parseRetryAfter(pastDate.toUTCString())).toBe(0); + }); + + it("returns 0 for garbage input", () => { + expect(parseRetryAfter("not-a-date-or-number")).toBe(0); + }); +}); + +describe("isWindowExhausted", () => { + it("returns false just below limit", () => { + const firstReceived = Date.now() - 999; + expect(isWindowExhausted(firstReceived, 1000)).toBe(false); + }); + + it("returns true at limit", () => { + const firstReceived = Date.now() - 1000; + expect(isWindowExhausted(firstReceived, 1000)).toBe(true); + }); + + it("returns true beyond limit", () => { + const firstReceived = Date.now() - 2000; + expect(isWindowExhausted(firstReceived, 1000)).toBe(true); + }); +}); + +describe("exceedsSqsMaxVisibility", () => { + it("returns false at 43200", () => { + expect(exceedsSqsMaxVisibility(43_200)).toBe(false); + }); + + it("returns true at 43201", () => { + expect(exceedsSqsMaxVisibility(43_201)).toBe(true); + }); +}); + +const makeRecord = (overrides: Partial = {}): SQSRecord => ({ + messageId: "msg-1", + receiptHandle: "receipt-1", + body: JSON.stringify({ + payload: {}, + subscriptionId: "sub-1", + targetId: "target-1", + }), + attributes: { + ApproximateReceiveCount: "1", + SentTimestamp: "0", + SenderId: "sender", + ApproximateFirstReceiveTimestamp: "0", + }, + messageAttributes: {}, + md5OfBody: "abc", + eventSource: "aws:sqs", + eventSourceARN: "arn:aws:sqs:eu-west-2:123:queue", + awsRegion: "eu-west-2", + ...overrides, +}); + +describe("handleRateLimitedRecord", () => { + beforeEach(() => { + jest.clearAllMocks(); + mockSendToDlq.mockResolvedValue(undefined); + mockChangeVisibility.mockResolvedValue(undefined); + }); + + it("sends to DLQ and returns when Retry-After exceeds SQS max visibility", async () => { + await handleRateLimitedRecord( + makeRecord(), + "client-1", + "target-1", + "50000", + 1, + ); + + expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body); + expect(mockChangeVisibility).not.toHaveBeenCalled(); + }); + + it("uses Retry-After value for changeVisibility when within SQS max", async () => { + await expect( + handleRateLimitedRecord(makeRecord(), "client-1", "target-1", "120", 1), + ).rejects.toThrow("Rate limited — requeue"); + + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 120); + expect(mockSendToDlq).not.toHaveBeenCalled(); + }); + + it("uses jittered backoff when no Retry-After header provided", async () => { + await expect( + handleRateLimitedRecord( + makeRecord(), + "client-1", + "target-1", + undefined, + 1, + ), + ).rejects.toThrow("Rate limited — requeue"); + + expect(mockChangeVisibility).toHaveBeenCalled(); + const [, delaySec] = mockChangeVisibility.mock.calls[0] as [string, number]; + expect(delaySec).toBeGreaterThanOrEqual(0); + expect(delaySec).toBeLessThan(5); + }); + + it("throws after requeuing so SQS marks the record as failed", async () => { + await expect( + handleRateLimitedRecord(makeRecord(), "client-1", "target-1", "30", 1), + ).rejects.toThrow("Rate limited — requeue"); + }); +}); diff --git a/lambdas/https-client-lambda/src/__tests__/sqs-visibility.test.ts b/lambdas/https-client-lambda/src/__tests__/sqs-visibility.test.ts new file mode 100644 index 00000000..9e0d9e54 --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/sqs-visibility.test.ts @@ -0,0 +1,71 @@ +import { ChangeMessageVisibilityCommand } from "@aws-sdk/client-sqs"; + +import { changeVisibility } from "services/sqs-visibility"; + +const mockSend = jest.fn(); +jest.mock("@aws-sdk/client-sqs", () => { + const actual = jest.requireActual("@aws-sdk/client-sqs"); + return { + ...actual, + SQSClient: jest.fn().mockImplementation(() => ({ + send: (...args: unknown[]) => mockSend(...args), + })), + }; +}); + +process.env.QUEUE_URL = "https://sqs.eu-west-2.invalid/123456789/test-queue"; + +describe("changeVisibility", () => { + beforeEach(() => { + mockSend.mockReset(); + }); + + it("sends ChangeMessageVisibilityCommand with correct params", async () => { + mockSend.mockResolvedValue({}); + + await changeVisibility("receipt-handle-1", 30); + + expect(mockSend).toHaveBeenCalledTimes(1); + const command = mockSend.mock.calls[0][0]; + expect(command).toBeInstanceOf(ChangeMessageVisibilityCommand); + expect(command.input).toEqual({ + QueueUrl: "https://sqs.eu-west-2.invalid/123456789/test-queue", + ReceiptHandle: "receipt-handle-1", + VisibilityTimeout: 30, + }); + }); + + it("floors fractional visibility timeout", async () => { + mockSend.mockResolvedValue({}); + + await changeVisibility("receipt-handle-1", 30.7); + + const command = mockSend.mock.calls[0][0]; + expect(command.input.VisibilityTimeout).toBe(30); + }); + + it("surfaces SDK errors", async () => { + mockSend.mockRejectedValue(new Error("SQS error")); + + await expect(changeVisibility("receipt-handle-1", 30)).rejects.toThrow( + "SQS error", + ); + }); + + it("throws when QUEUE_URL is not set", async () => { + let changeFn: typeof changeVisibility; + const saved = process.env.QUEUE_URL; + delete process.env.QUEUE_URL; + + jest.isolateModules(() => { + // eslint-disable-next-line @typescript-eslint/no-require-imports -- jest.isolateModules requires synchronous require + changeFn = require("services/sqs-visibility").changeVisibility; + }); + + await expect(changeFn!("receipt-handle-1", 30)).rejects.toThrow( + "QUEUE_URL is required", + ); + + process.env.QUEUE_URL = saved; + }); +}); diff --git a/lambdas/https-client-lambda/src/__tests__/ssm-applications-map.test.ts b/lambdas/https-client-lambda/src/__tests__/ssm-applications-map.test.ts new file mode 100644 index 00000000..40592928 --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/ssm-applications-map.test.ts @@ -0,0 +1,117 @@ +import { GetParameterCommand } from "@aws-sdk/client-ssm"; + +import { getApplicationId, resetCache } from "services/ssm-applications-map"; + +const mockSend = jest.fn(); +jest.mock("@aws-sdk/client-ssm", () => { + const actual = jest.requireActual("@aws-sdk/client-ssm"); + return { + ...actual, + SSMClient: jest.fn().mockImplementation(() => ({ + send: (...args: unknown[]) => mockSend(...args), + })), + }; +}); + +jest.mock("services/logger", () => ({ + logger: { + info: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + debug: jest.fn(), + }, +})); + +process.env.APPLICATIONS_MAP_PARAMETER = "/test/applications-map"; + +describe("getApplicationId", () => { + beforeEach(() => { + mockSend.mockReset(); + resetCache(); + }); + + it("returns correct applicationId for a known clientId", async () => { + mockSend.mockResolvedValue({ + Parameter: { + Value: JSON.stringify({ + "client-1": "app-id-1", + "client-2": "app-id-2", + }), + }, + }); + + const result = await getApplicationId("client-1"); + + expect(result).toBe("app-id-1"); + expect(mockSend).toHaveBeenCalledTimes(1); + expect(mockSend.mock.calls[0][0]).toBeInstanceOf(GetParameterCommand); + }); + + it("throws for unknown clientId", async () => { + mockSend.mockResolvedValue({ + Parameter: { + Value: JSON.stringify({ "client-1": "app-id-1" }), + }, + }); + + await expect(getApplicationId("unknown")).rejects.toThrow( + "No applicationId found for clientId 'unknown' in SSM map", + ); + }); + + it("surfaces SSM SDK errors", async () => { + mockSend.mockRejectedValue(new Error("SSM unavailable")); + + await expect(getApplicationId("client-1")).rejects.toThrow( + "SSM unavailable", + ); + }); + + it("throws when APPLICATIONS_MAP_PARAMETER is not set", async () => { + let getFn: typeof getApplicationId; + const saved = process.env.APPLICATIONS_MAP_PARAMETER; + delete process.env.APPLICATIONS_MAP_PARAMETER; + + jest.isolateModules(() => { + // eslint-disable-next-line @typescript-eslint/no-require-imports -- jest.isolateModules requires synchronous require + getFn = require("services/ssm-applications-map").getApplicationId; + }); + + await expect(getFn!("client-1")).rejects.toThrow( + "APPLICATIONS_MAP_PARAMETER is required", + ); + + process.env.APPLICATIONS_MAP_PARAMETER = saved; + }); + + it("throws when SSM parameter value is empty", async () => { + mockSend.mockResolvedValue({ Parameter: { Value: undefined } }); + + await expect(getApplicationId("client-1")).rejects.toThrow( + "not found or has no value", + ); + }); + + it("throws when SSM parameter contains invalid JSON", async () => { + mockSend.mockResolvedValue({ + Parameter: { Value: "not-json" }, + }); + + await expect(getApplicationId("client-1")).rejects.toThrow( + "contains invalid JSON", + ); + }); + + it("caches the applications map between calls", async () => { + mockSend.mockResolvedValue({ + Parameter: { + Value: JSON.stringify({ "client-1": "app-id-1" }), + }, + }); + + await getApplicationId("client-1"); + await getApplicationId("client-1"); + + expect(mockSend).toHaveBeenCalledTimes(1); + }); +}); diff --git a/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts b/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts new file mode 100644 index 00000000..6b306234 --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts @@ -0,0 +1,364 @@ +import type { CallbackTarget } from "@nhs-notify-client-callbacks/models"; + +const mockS3Send = jest.fn(); +jest.mock("@aws-sdk/client-s3", () => { + const actual = jest.requireActual("@aws-sdk/client-s3"); + return { + ...actual, + S3Client: jest.fn().mockImplementation(() => ({ send: mockS3Send })), + }; +}); + +const mockSecretsManagerSend = jest.fn(); +jest.mock("@aws-sdk/client-secrets-manager", () => { + const actual = jest.requireActual("@aws-sdk/client-secrets-manager"); + return { + ...actual, + SecretsManagerClient: jest + .fn() + .mockImplementation(() => ({ send: mockSecretsManagerSend })), + }; +}); + +jest.mock("services/logger", () => ({ + logger: { + info: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + debug: jest.fn(), + }, +})); + +jest.mock("node-forge", () => ({ + pem: { + decode: jest.fn((input: string) => { + const matches = [ + ...(input ?? "").matchAll( + /-----BEGIN ([^-]+)-----[\s\S]*?-----END [^-]+-----/g, + ), + ]; + return matches.map((match) => ({ + type: (match[1] ?? "").trim(), + body: "", + })); + }), + encode: jest.fn( + (obj: { type: string }) => + `-----BEGIN ${obj.type}-----\nZmFrZQ==\n-----END ${obj.type}-----\n`, + ), + }, +})); + +const mockValidTo = new Date(Date.now() + 365 * 86_400_000).toISOString(); + +jest.mock("node:crypto", () => { + const actual = jest.requireActual("node:crypto"); + return { + ...actual, + X509Certificate: class MockX509Certificate { + validTo = mockValidTo; + + publicKey = { + export: () => Buffer.from("mock-spki-der"), + }; + }, + }; +}); + +const TEST_KEY = + "-----BEGIN PRIVATE KEY-----\nfake-key\n-----END PRIVATE KEY-----"; // gitleaks:allow +const TEST_CERT = + "-----BEGIN CERTIFICATE-----\nfake-cert\n-----END CERTIFICATE-----"; +const COMBINED_PEM = `${TEST_KEY}\n${TEST_CERT}`; + +const createTarget = ( + overrides: Partial = {}, +): CallbackTarget => ({ + targetId: "target-1", + type: "API", + invocationEndpoint: "https://webhook.example.invalid", + invocationMethod: "POST", + invocationRateLimit: 10, + apiKey: { headerName: "x-api-key", headerValue: "secret" }, + mtls: { enabled: false }, + certPinning: { enabled: false }, + ...overrides, +}); + +const mockS3PemResponse = (pem: string) => { + mockS3Send.mockResolvedValue({ + Body: { transformToString: jest.fn().mockResolvedValue(pem) }, + }); +}; + +describe("tls-agent-factory", () => { + let buildAgent: typeof import("services/delivery/tls-agent-factory").buildAgent; + let resetCache: typeof import("services/delivery/tls-agent-factory").resetCache; + + beforeEach(async () => { + jest.resetModules(); + + delete process.env.MTLS_CERT_SECRET_ARN; + process.env.MTLS_TEST_CERT_S3_BUCKET = "test-certs-bucket"; + process.env.MTLS_TEST_CERT_S3_KEY = "client.pem"; + delete process.env.MTLS_TEST_CA_S3_KEY; + process.env.CERT_EXPIRY_THRESHOLD_MS = "86400000"; + + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery/tls-agent-factory"); + buildAgent = mod.buildAgent; + resetCache = mod.resetCache; + + mockS3Send.mockReset(); + mockSecretsManagerSend.mockReset(); + }); + + it("builds agent with key and cert when mtls is enabled", async () => { + mockS3PemResponse(COMBINED_PEM); + const agent = await buildAgent(createTarget({ mtls: { enabled: true } })); + + expect(agent).toBeDefined(); + expect(agent.options.keepAlive).toBe(false); + }); + + it("builds agent without key and cert when mtls is disabled", async () => { + const agent = await buildAgent(createTarget({ mtls: { enabled: false } })); + + expect(agent).toBeDefined(); + expect(mockS3Send).not.toHaveBeenCalled(); + expect(mockSecretsManagerSend).not.toHaveBeenCalled(); + }); + + it("loads test CA when MTLS_TEST_CA_S3_KEY is set", async () => { + process.env.MTLS_TEST_CA_S3_KEY = "test-ca.pem"; + jest.resetModules(); + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery/tls-agent-factory"); + + const caPem = + "-----BEGIN CERTIFICATE-----\ntest-ca\n-----END CERTIFICATE-----"; + mockS3Send + .mockResolvedValueOnce({ + Body: { + transformToString: jest.fn().mockResolvedValue(COMBINED_PEM), + }, + }) + .mockResolvedValueOnce({ + Body: { transformToString: jest.fn().mockResolvedValue(caPem) }, + }); + + const agent = await mod.buildAgent( + createTarget({ mtls: { enabled: true } }), + ); + + expect(agent).toBeDefined(); + expect(mockS3Send).toHaveBeenCalledTimes(2); + }); + + it("loads cert from S3 in non-production", async () => { + mockS3PemResponse(COMBINED_PEM); + await buildAgent(createTarget({ mtls: { enabled: true } })); + + expect(mockS3Send).toHaveBeenCalledTimes(1); + expect(mockSecretsManagerSend).not.toHaveBeenCalled(); + }); + + it("loads cert from SecretsManager in production", async () => { + process.env.MTLS_CERT_SECRET_ARN = + "arn:aws:secretsmanager:eu-west-2:123:secret:mtls-cert"; + jest.resetModules(); + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery/tls-agent-factory"); + + mockSecretsManagerSend.mockResolvedValue({ + SecretString: JSON.stringify({ key: TEST_KEY, cert: TEST_CERT }), + }); + + const agent = await mod.buildAgent( + createTarget({ mtls: { enabled: true } }), + ); + + expect(agent).toBeDefined(); + expect(mockSecretsManagerSend).toHaveBeenCalledTimes(1); + expect(mockS3Send).not.toHaveBeenCalled(); + }); + + it("caches cert material on subsequent calls", async () => { + mockS3PemResponse(COMBINED_PEM); + const target = createTarget({ mtls: { enabled: true } }); + + await buildAgent(target); + await buildAgent(target); + + expect(mockS3Send).toHaveBeenCalledTimes(1); + }); + + it("exports PERMANENT_TLS_ERROR_CODES set", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery/tls-agent-factory"); + + expect(mod.PERMANENT_TLS_ERROR_CODES).toBeInstanceOf(Set); + expect(mod.PERMANENT_TLS_ERROR_CODES.has("CERT_HAS_EXPIRED")).toBe(true); + }); + + it("resets cached material via resetCache", async () => { + mockS3PemResponse(COMBINED_PEM); + const target = createTarget({ mtls: { enabled: true } }); + + await buildAgent(target); + resetCache(); + await buildAgent(target); + + expect(mockS3Send).toHaveBeenCalledTimes(2); + }); + + it("throws when SecretsManager returns empty SecretString", async () => { + process.env.MTLS_CERT_SECRET_ARN = + "arn:aws:secretsmanager:eu-west-2:123:secret:mtls-cert"; + jest.resetModules(); + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery/tls-agent-factory"); + + mockSecretsManagerSend.mockResolvedValue({ SecretString: undefined }); + + await expect( + mod.buildAgent(createTarget({ mtls: { enabled: true } })), + ).rejects.toThrow("mTLS cert secret has no value"); + }); + + it("throws when S3 env vars are missing in non-production", async () => { + delete process.env.MTLS_TEST_CERT_S3_BUCKET; + delete process.env.MTLS_TEST_CERT_S3_KEY; + jest.resetModules(); + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery/tls-agent-factory"); + + await expect( + mod.buildAgent(createTarget({ mtls: { enabled: true } })), + ).rejects.toThrow( + "MTLS_TEST_CERT_S3_BUCKET and MTLS_TEST_CERT_S3_KEY are required", + ); + }); + + it("throws when S3 object body is empty", async () => { + mockS3Send.mockResolvedValue({ Body: undefined }); + + await expect( + buildAgent(createTarget({ mtls: { enabled: true } })), + ).rejects.toThrow("has no body"); + }); + + it("builds agent with checkServerIdentity when certPinning is enabled", async () => { + mockS3PemResponse(COMBINED_PEM); + const target = createTarget({ + mtls: { enabled: true }, + certPinning: { enabled: true, spkiHash: "abc123" }, + }); + + const agent = await buildAgent(target); + + expect(agent).toBeDefined(); + expect(agent.options.checkServerIdentity).toBeDefined(); + }); + + it("checkServerIdentity returns error when SPKI hash does not match", async () => { + mockS3PemResponse(COMBINED_PEM); + const target = createTarget({ + mtls: { enabled: true }, + certPinning: { enabled: true, spkiHash: "expected-hash" }, + }); + + const agent = await buildAgent(target); + const checkFn = agent.options.checkServerIdentity as ( + hostname: string, + cert: { raw: Buffer; subject: { CN: string } }, + ) => Error | undefined; + + const mockPeerCert = { + raw: Buffer.from("mock-cert-der"), + subject: { CN: "webhook.example.invalid" }, + subjectaltname: "DNS:webhook.example.invalid", + }; + + const result = checkFn("webhook.example.invalid", mockPeerCert); + + expect(result).toBeInstanceOf(Error); + expect(result!.message).toContain("Certificate pinning failed"); + expect((result as NodeJS.ErrnoException).code).toBe( + "ERR_CERT_PINNING_FAILED", + ); + }); + + it("checkServerIdentity returns undefined when SPKI hash matches", async () => { + const { createHash } = jest.requireActual("node:crypto"); + const expectedHash = createHash("sha256") + .update(Buffer.from("mock-spki-der")) + .digest("base64"); + + mockS3PemResponse(COMBINED_PEM); + const target = createTarget({ + mtls: { enabled: true }, + certPinning: { enabled: true, spkiHash: expectedHash }, + }); + + const agent = await buildAgent(target); + const checkFn = agent.options.checkServerIdentity as ( + hostname: string, + cert: { raw: Buffer; subject: { CN: string } }, + ) => Error | undefined; + + const mockPeerCert = { + raw: Buffer.from("mock-cert-der"), + subject: { CN: "webhook.example.invalid" }, + subjectaltname: "DNS:webhook.example.invalid", + }; + + const result = checkFn("webhook.example.invalid", mockPeerCert); + + expect(result).toBeUndefined(); + }); + + it("checkServerIdentity returns default error when hostname does not match", async () => { + mockS3PemResponse(COMBINED_PEM); + const target = createTarget({ + mtls: { enabled: true }, + certPinning: { enabled: true, spkiHash: "abc" }, + }); + + const agent = await buildAgent(target); + const checkFn = agent.options.checkServerIdentity as ( + hostname: string, + cert: { raw: Buffer; subject: { CN: string } }, + ) => Error | undefined; + + const mockPeerCert = { + raw: Buffer.from("mock-cert-der"), + subject: { CN: "other.example.invalid" }, + subjectaltname: "DNS:other.example.invalid", + }; + + const result = checkFn("webhook.example.invalid", mockPeerCert); + + expect(result).toBeDefined(); + expect(result!.message).toContain("does not match"); + }); + + it("does not load cert material when mtls is disabled", async () => { + const agent = await buildAgent(createTarget({ mtls: { enabled: false } })); + + expect(agent).toBeDefined(); + expect(mockS3Send).not.toHaveBeenCalled(); + expect(mockSecretsManagerSend).not.toHaveBeenCalled(); + }); + + it("throws when certPinning.enabled is true but spkiHash is missing", async () => { + const target = createTarget({ + certPinning: { enabled: true }, + }); + + await expect(buildAgent(target)).rejects.toThrow( + "certPinning.spkiHash is required when certPinning is enabled", + ); + expect(mockS3Send).not.toHaveBeenCalled(); + }); +}); diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts new file mode 100644 index 00000000..95c28a89 --- /dev/null +++ b/lambdas/https-client-lambda/src/handler.ts @@ -0,0 +1,222 @@ +import type { SQSBatchItemFailure, SQSRecord } from "aws-lambda"; +import type { ClientCallbackPayload } from "@nhs-notify-client-callbacks/models"; +import pMap from "p-map"; +import { logger } from "services/logger"; +import { loadTargetConfig } from "services/config-loader"; +import { getApplicationId } from "services/ssm-applications-map"; +import { signPayload } from "services/payload-signer"; +import { buildAgent } from "services/delivery/tls-agent-factory"; +import { deliverPayload } from "services/delivery/https-client"; +import type { DeliveryResult } from "services/delivery/https-client"; +import { sendToDlq } from "services/dlq-sender"; +import { changeVisibility } from "services/sqs-visibility"; +import { + handleRateLimitedRecord, + isWindowExhausted, + jitteredBackoffSeconds, +} from "services/delivery/retry-policy"; +import { + type EndpointGateConfig, + admit, + getRedisClient, + recordResult, +} from "services/endpoint-gate"; +import { + recordCircuitBreakerOpen, + recordDeliveryAttempt, + recordDeliveryFailure, + recordDeliveryPermanentFailure, + recordDeliveryRateLimited, + recordDeliverySuccess, +} from "services/delivery-observability"; +import { flushMetrics } from "services/delivery-metrics"; + +type RedisClientType = Awaited>; + +const DEFAULT_MAX_RETRY_DURATION_MS = 3_600_000; +const DEFAULT_CONCURRENCY_LIMIT = 5; + +const gateConfig: EndpointGateConfig = { + burstCapacity: Number(process.env.TOKEN_BUCKET_BURST_CAPACITY ?? "10"), + cbProbeIntervalMs: Number(process.env.CB_PROBE_INTERVAL_MS ?? "60000"), + decayPeriodMs: Number(process.env.CB_DECAY_PERIOD_MS ?? "300000"), + cbWindowPeriodMs: Number(process.env.CB_WINDOW_PERIOD_MS ?? "60000"), + cbErrorThreshold: Number(process.env.CB_ERROR_THRESHOLD ?? "0.5"), + cbMinAttempts: Number(process.env.CB_MIN_ATTEMPTS ?? "10"), + cbCooldownMs: Number(process.env.CB_COOLDOWN_MS ?? "60000"), +}; + +type CallbackDeliveryMessage = { + payload: ClientCallbackPayload; + subscriptionId: string; + targetId: string; +}; + +async function checkAdmission( + redis: RedisClientType, + targetId: string, + invocationRateLimit: number, + cbEnabled: boolean, + clientId: string, + record: SQSRecord, +): Promise { + const gateResult = await admit( + redis, + targetId, + invocationRateLimit, + cbEnabled, + gateConfig, + ); + + if (!gateResult.allowed) { + const delaySec = Math.ceil(gateResult.retryAfterMs / 1000); + logger.warn(`Admission denied: ${gateResult.reason} — requeuing`, { + clientId, + targetId, + reason: gateResult.reason, + delaySec, + }); + await changeVisibility(record.receiptHandle, delaySec); + throw new Error(`Admission denied: ${gateResult.reason}`); + } +} + +async function handleDeliveryResult( + result: DeliveryResult, + record: SQSRecord, + redis: RedisClientType, + clientId: string, + targetId: string, + cbEnabled: boolean, +): Promise { + if (result.ok) { + if (cbEnabled) { + await recordResult(redis, targetId, true, gateConfig); + } + recordDeliverySuccess(clientId, targetId); + return; + } + + if (result.permanent) { + recordDeliveryPermanentFailure(clientId, targetId); + await sendToDlq(record.body); + return; + } + + const receiveCount = Number(record.attributes.ApproximateReceiveCount); + + if ("retryAfterHeader" in result) { + recordDeliveryRateLimited(clientId, targetId); + await handleRateLimitedRecord( + record, + clientId, + targetId, + result.retryAfterHeader, + receiveCount, + ); + return; + } + + const backoffSec = jitteredBackoffSeconds(receiveCount); + if (cbEnabled) { + const cbOutcome = await recordResult(redis, targetId, false, gateConfig); + if (!cbOutcome.ok) { + recordCircuitBreakerOpen(targetId); + } + } + recordDeliveryFailure(clientId, targetId, result.statusCode, backoffSec); + await changeVisibility(record.receiptHandle, backoffSec); + throw new Error(`Transient failure: ${result.statusCode}`); +} + +async function processRecord( + record: SQSRecord, + redis: RedisClientType, +): Promise { + const { CLIENT_ID } = process.env; + if (!CLIENT_ID) { + throw new Error("CLIENT_ID is required"); + } + + const message: CallbackDeliveryMessage = JSON.parse(record.body); + const { payload, targetId } = message; + + logger.info("Processing delivery", { clientId: CLIENT_ID, targetId }); + + const target = await loadTargetConfig(CLIENT_ID, targetId); + const maxRetryDurationMs = + target.delivery?.maxRetryDurationSeconds === undefined + ? DEFAULT_MAX_RETRY_DURATION_MS + : target.delivery.maxRetryDurationSeconds * 1000; + + const firstReceivedMs = Number( + record.attributes.ApproximateFirstReceiveTimestamp, + ); + + if (isWindowExhausted(firstReceivedMs, maxRetryDurationMs)) { + logger.warn("Retry window exhausted — sending to DLQ", { + clientId: CLIENT_ID, + targetId, + }); + await sendToDlq(record.body); + return; + } + + const applicationId = await getApplicationId(CLIENT_ID); + const cbEnabled = target.delivery?.circuitBreaker?.enabled ?? false; + + await checkAdmission( + redis, + targetId, + target.invocationRateLimit, + cbEnabled, + CLIENT_ID, + record, + ); + + const agent = await buildAgent(target); + const signature = signPayload( + applicationId, + target.apiKey.headerValue, + payload, + ); + const payloadJson = JSON.stringify(payload); + + recordDeliveryAttempt(CLIENT_ID, targetId); + const result = await deliverPayload(target, payloadJson, signature, agent); + + await handleDeliveryResult( + result, + record, + redis, + CLIENT_ID, + targetId, + cbEnabled, + ); +} + +export async function processRecords( + records: SQSRecord[], +): Promise { + const concurrencyLimit = Number( + process.env.CONCURRENCY_LIMIT ?? String(DEFAULT_CONCURRENCY_LIMIT), + ); + + const redis = await getRedisClient(); + + const results = await pMap( + records, + async (record): Promise => { + try { + await processRecord(record, redis); + return null; + } catch { + return { itemIdentifier: record.messageId }; + } + }, + { concurrency: concurrencyLimit }, + ); + + await flushMetrics(); + return results.filter((r): r is SQSBatchItemFailure => r !== null); +} diff --git a/lambdas/https-client-lambda/src/index.ts b/lambdas/https-client-lambda/src/index.ts new file mode 100644 index 00000000..d53608ff --- /dev/null +++ b/lambdas/https-client-lambda/src/index.ts @@ -0,0 +1,7 @@ +import type { SQSBatchResponse, SQSEvent } from "aws-lambda"; +import { processRecords } from "handler"; + +export async function handler(event: SQSEvent): Promise { + const batchItemFailures = await processRecords(event.Records); + return { batchItemFailures }; +} diff --git a/lambdas/https-client-lambda/src/lua.d.ts b/lambdas/https-client-lambda/src/lua.d.ts new file mode 100644 index 00000000..8fe49f84 --- /dev/null +++ b/lambdas/https-client-lambda/src/lua.d.ts @@ -0,0 +1,4 @@ +declare module "*.lua" { + const content: string; + export default content; +} diff --git a/lambdas/https-client-lambda/src/services/admit.lua b/lambdas/https-client-lambda/src/services/admit.lua new file mode 100644 index 00000000..8fdf2b15 --- /dev/null +++ b/lambdas/https-client-lambda/src/services/admit.lua @@ -0,0 +1,95 @@ +-- admit.lua +-- Atomic token-bucket rate limiter + circuit-breaker admission check. +-- KEYS[1] = rl:{targetId} (rate limiter hash) +-- KEYS[2] = cb:{targetId} (circuit breaker hash) +-- ARGV[1] = now (epoch ms) +-- ARGV[2] = refillPerSec (tokens/sec from target config) +-- ARGV[3] = capacity (burst capacity) +-- ARGV[4] = cbProbeIntervalMs +-- ARGV[5] = cbEnabled ("1" or "0") +-- ARGV[6] = decayPeriodMs + +local rl_key = KEYS[1] +local cb_key = KEYS[2] +local now = tonumber(ARGV[1]) +local refillPerSec = tonumber(ARGV[2]) +local capacity = tonumber(ARGV[3]) +local cbProbeIntervalMs = tonumber(ARGV[4]) +local cbEnabled = ARGV[5] == "1" +local decayPeriodMs = tonumber(ARGV[6]) + +-- Load circuit breaker state +local opened_until_ms = tonumber(redis.call("HGET", cb_key, "opened_until_ms") or "0") or 0 +local last_probe_ms = tonumber(redis.call("HGET", cb_key, "last_probe_ms") or "0") or 0 + +-- Circuit breaker evaluation (only when enabled) +if cbEnabled and opened_until_ms > 0 and now < opened_until_ms then + -- Circuit is open — check for probe slot + if cbProbeIntervalMs > 0 and (now - last_probe_ms) >= cbProbeIntervalMs then + redis.call("HSET", cb_key, "last_probe_ms", tostring(now)) + return cjson.encode({ + allowed = true, + probe = true, + effectiveRate = 0, + }) + end + -- No probe slot available + local retryAfterMs = opened_until_ms - now + return cjson.encode({ + allowed = false, + reason = "circuit_open", + retryAfterMs = retryAfterMs, + effectiveRate = 0, + }) +end + +-- Compute effective rate (with decay scaling if applicable) +local effectiveRate = refillPerSec + +if cbEnabled and opened_until_ms > 0 and now >= opened_until_ms and decayPeriodMs > 0 then + local elapsed_since_close = now - opened_until_ms + if elapsed_since_close < decayPeriodMs then + effectiveRate = refillPerSec * (elapsed_since_close / decayPeriodMs) + if effectiveRate < 0.001 then + effectiveRate = 0.001 + end + end +end + +-- Load rate limiter state +local tokens = tonumber(redis.call("HGET", rl_key, "tokens") or "") or capacity +local last_refill_ms = tonumber(redis.call("HGET", rl_key, "last_refill_ms") or "") or now + +-- Refill tokens +local elapsed_ms = now - last_refill_ms +if elapsed_ms > 0 then + tokens = math.min(capacity, tokens + elapsed_ms * effectiveRate / 1000) +end + +-- Check rate limit +if tokens < 1 then + -- Compute retry-after based on effective rate + local retryAfterMs = 0 + if effectiveRate > 0 then + retryAfterMs = math.ceil((1 - tokens) / effectiveRate * 1000) + else + retryAfterMs = 1000 + end + return cjson.encode({ + allowed = false, + reason = "rate_limited", + retryAfterMs = retryAfterMs, + effectiveRate = effectiveRate, + }) +end + +-- Deduct token and update state +tokens = tokens - 1 +redis.call("HSET", rl_key, "tokens", tostring(tokens)) +redis.call("HSET", rl_key, "last_refill_ms", tostring(now)) + +return cjson.encode({ + allowed = true, + probe = false, + effectiveRate = effectiveRate, +}) diff --git a/lambdas/https-client-lambda/src/services/config-loader.ts b/lambdas/https-client-lambda/src/services/config-loader.ts new file mode 100644 index 00000000..c8f9e714 --- /dev/null +++ b/lambdas/https-client-lambda/src/services/config-loader.ts @@ -0,0 +1,77 @@ +import { GetObjectCommand, S3Client } from "@aws-sdk/client-s3"; +import { + type CallbackTarget, + parseClientSubscriptionConfiguration, +} from "@nhs-notify-client-callbacks/models"; +import { ConfigCache } from "@nhs-notify-client-callbacks/config-cache"; +import { logger } from "services/logger"; + +const s3Client = new S3Client({}); +let cache: ConfigCache | undefined; + +function getCache(): ConfigCache { + if (!cache) { + const ttlSeconds = + Number(process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS) || 300; + cache = new ConfigCache(ttlSeconds * 1000); + } + return cache; +} + +export function resetCache(): void { + cache = undefined; +} + +export async function loadTargetConfig( + clientId: string, + targetId: string, +): Promise { + let clientConfig = getCache().get(clientId); + + if (!clientConfig) { + const { + CLIENT_SUBSCRIPTION_CONFIG_BUCKET, + CLIENT_SUBSCRIPTION_CONFIG_PREFIX, + } = process.env; + if (!CLIENT_SUBSCRIPTION_CONFIG_BUCKET) { + throw new Error("CLIENT_SUBSCRIPTION_CONFIG_BUCKET is required"); + } + + const prefix = CLIENT_SUBSCRIPTION_CONFIG_PREFIX ?? "client_subscriptions/"; + + const response = await s3Client.send( + new GetObjectCommand({ + Bucket: CLIENT_SUBSCRIPTION_CONFIG_BUCKET, + Key: `${prefix}${clientId}.json`, + }), + ); + + if (!response.Body) { + throw new Error(`S3 response body was empty for client '${clientId}'`); + } + + const raw = await response.Body.transformToString(); + const parsed = JSON.parse(raw) as unknown; + const result = parseClientSubscriptionConfiguration(parsed); + + if (!result.success) { + throw new Error( + `Invalid client config for '${clientId}': ${result.error.message}`, + ); + } + + clientConfig = result.data; + getCache().set(clientId, clientConfig); + logger.info("Client config loaded from S3", { clientId }); + } + + const target = clientConfig.targets.find((t) => t.targetId === targetId); + + if (!target) { + throw new Error( + `Target '${targetId}' not found in config for client '${clientId}'`, + ); + } + + return target; +} diff --git a/lambdas/https-client-lambda/src/services/delivery-metrics.ts b/lambdas/https-client-lambda/src/services/delivery-metrics.ts new file mode 100644 index 00000000..f9a6bc49 --- /dev/null +++ b/lambdas/https-client-lambda/src/services/delivery-metrics.ts @@ -0,0 +1,72 @@ +import { Unit, createMetricsLogger } from "aws-embedded-metrics"; +import type { MetricsLogger } from "aws-embedded-metrics"; + +let metricsInstance: MetricsLogger | undefined; + +function getMetrics(): MetricsLogger { + if (metricsInstance) { + return metricsInstance; + } + + const namespace = process.env.METRICS_NAMESPACE; + const environment = process.env.ENVIRONMENT; + + if (!namespace) { + throw new Error("METRICS_NAMESPACE environment variable is not set"); + } + if (!environment) { + throw new Error("ENVIRONMENT environment variable is not set"); + } + + metricsInstance = createMetricsLogger(); + metricsInstance.setNamespace(namespace); + metricsInstance.setDimensions({ Environment: environment }); + + return metricsInstance; +} + +export function emitDeliveryAttempt(targetId: string): void { + const metrics = getMetrics(); + metrics.setProperty("targetId", targetId); + metrics.putMetric("DeliveryAttempt", 1, Unit.Count); +} + +export function emitDeliverySuccess(targetId: string): void { + const metrics = getMetrics(); + metrics.setProperty("targetId", targetId); + metrics.putMetric("DeliverySuccess", 1, Unit.Count); +} + +export function emitDeliveryFailure(targetId: string): void { + const metrics = getMetrics(); + metrics.setProperty("targetId", targetId); + metrics.putMetric("DeliveryFailure", 1, Unit.Count); +} + +export function emitDeliveryPermanentFailure(targetId: string): void { + const metrics = getMetrics(); + metrics.setProperty("targetId", targetId); + metrics.putMetric("DeliveryPermanentFailure", 1, Unit.Count); +} + +export function emitRateLimited(targetId: string): void { + const metrics = getMetrics(); + metrics.setProperty("targetId", targetId); + metrics.putMetric("DeliveryRateLimited", 1, Unit.Count); +} + +export function emitCircuitBreakerOpen(targetId: string): void { + const metrics = getMetrics(); + metrics.setProperty("targetId", targetId); + metrics.putMetric("CircuitBreakerOpen", 1, Unit.Count); +} + +export async function flushMetrics(): Promise { + if (metricsInstance) { + await metricsInstance.flush(); + } +} + +export function resetMetrics(): void { + metricsInstance = undefined; +} diff --git a/lambdas/https-client-lambda/src/services/delivery-observability.ts b/lambdas/https-client-lambda/src/services/delivery-observability.ts new file mode 100644 index 00000000..c2861e28 --- /dev/null +++ b/lambdas/https-client-lambda/src/services/delivery-observability.ts @@ -0,0 +1,63 @@ +import { logger } from "services/logger"; +import { + emitCircuitBreakerOpen, + emitDeliveryAttempt, + emitDeliveryFailure, + emitDeliveryPermanentFailure, + emitDeliverySuccess, + emitRateLimited, +} from "services/delivery-metrics"; + +export function recordDeliveryAttempt( + clientId: string, + targetId: string, +): void { + emitDeliveryAttempt(targetId); + logger.info("Attempting delivery", { clientId, targetId }); +} + +export function recordDeliverySuccess( + clientId: string, + targetId: string, +): void { + emitDeliverySuccess(targetId); + logger.info("Delivery succeeded", { clientId, targetId }); +} + +export function recordDeliveryPermanentFailure( + clientId: string, + targetId: string, +): void { + emitDeliveryPermanentFailure(targetId); + logger.warn("Permanent delivery failure — sending to DLQ", { + clientId, + targetId, + }); +} + +export function recordDeliveryRateLimited( + clientId: string, + targetId: string, +): void { + emitRateLimited(targetId); + logger.info("Rate limited (429)", { clientId, targetId }); +} + +export function recordDeliveryFailure( + clientId: string, + targetId: string, + statusCode: number, + backoffSec: number, +): void { + emitDeliveryFailure(targetId); + logger.warn("Transient delivery failure — requeuing", { + clientId, + targetId, + statusCode, + backoffSec, + }); +} + +export function recordCircuitBreakerOpen(targetId: string): void { + emitCircuitBreakerOpen(targetId); +} diff --git a/lambdas/https-client-lambda/src/services/delivery/https-client.ts b/lambdas/https-client-lambda/src/services/delivery/https-client.ts new file mode 100644 index 00000000..41397928 --- /dev/null +++ b/lambdas/https-client-lambda/src/services/delivery/https-client.ts @@ -0,0 +1,85 @@ +import https from "node:https"; +import type { Agent } from "node:https"; +import type { CallbackTarget } from "@nhs-notify-client-callbacks/models"; +import { PERMANENT_TLS_ERROR_CODES } from "services/delivery/tls-agent-factory"; + +export type DeliveryResult = + | { ok: true } + | { ok: false; permanent: true } + | { + ok: false; + permanent: false; + statusCode: 429; + retryAfterHeader: string | undefined; + } + | { ok: false; permanent: false; statusCode: number }; + +export function deliverPayload( + target: CallbackTarget, + signedPayloadJson: string, + signatureHeader: string, + agent: Agent, +): Promise { + const requestTimeoutMs = Number(process.env.REQUEST_TIMEOUT_MS ?? "30000"); + + return new Promise((resolve) => { + const url = new URL(target.invocationEndpoint); + + const req = https.request( + url, + { + method: target.invocationMethod, + agent, + timeout: requestTimeoutMs, + headers: { + "Content-Type": "application/json", + "x-hmac-sha256-signature": signatureHeader, + [target.apiKey.headerName]: target.apiKey.headerValue, + }, + }, + (res) => { + res.resume(); + + const statusCode = res.statusCode ?? 0; + + if (statusCode >= 200 && statusCode < 300) { + resolve({ ok: true }); + return; + } + + if (statusCode === 429) { + const retryAfterHeader = res.headers["retry-after"]; + resolve({ + ok: false, + permanent: false, + statusCode: 429, + retryAfterHeader, + }); + return; + } + + if (statusCode >= 400 && statusCode < 500) { + resolve({ ok: false, permanent: true }); + return; + } + + resolve({ ok: false, permanent: false, statusCode }); + }, + ); + + req.on("timeout", () => { + req.destroy(new Error("Request timed out")); + }); + + req.on("error", (error: NodeJS.ErrnoException) => { + if (error.code && PERMANENT_TLS_ERROR_CODES.has(error.code)) { + resolve({ ok: false, permanent: true }); + return; + } + + resolve({ ok: false, permanent: false, statusCode: 0 }); + }); + + req.end(signedPayloadJson); + }); +} diff --git a/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts b/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts new file mode 100644 index 00000000..65a719f5 --- /dev/null +++ b/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts @@ -0,0 +1,79 @@ +import type { SQSRecord } from "aws-lambda"; +import { logger } from "services/logger"; +import { sendToDlq } from "services/dlq-sender"; +import { changeVisibility } from "services/sqs-visibility"; + +const BACKOFF_CAP_SECONDS = 300; +const SQS_MAX_VISIBILITY_SECONDS = 43_200; +const BASE_BACKOFF_MULTIPLIER = 5; +const BACKOFF_EXPONENT_BASE = 2; + +export function jitteredBackoffSeconds(receiveCount: number): number { + const ceiling = Math.min( + BASE_BACKOFF_MULTIPLIER * BACKOFF_EXPONENT_BASE ** (receiveCount - 1), + BACKOFF_CAP_SECONDS, + ); + // eslint-disable-next-line sonarjs/pseudo-random -- jitter for backoff, not security-sensitive + return Math.floor(Math.random() * ceiling); +} + +export function parseRetryAfter(header: string): number { + const asInt = Number(header); + + if (!Number.isNaN(asInt) && Number.isFinite(asInt)) { + return Math.max(0, Math.floor(asInt)); + } + + const date = new Date(header); + if (Number.isNaN(date.getTime())) { + return 0; + } + + return Math.max(0, Math.floor((date.getTime() - Date.now()) / 1000)); +} + +export function isWindowExhausted( + firstReceivedMs: number, + maxRetryDurationMs: number, +): boolean { + return Date.now() - firstReceivedMs >= maxRetryDurationMs; +} + +export function exceedsSqsMaxVisibility(retryAfterSeconds: number): boolean { + return retryAfterSeconds > SQS_MAX_VISIBILITY_SECONDS; +} + +export async function handleRateLimitedRecord( + record: SQSRecord, + clientId: string, + targetId: string, + retryAfterHeader: string | undefined, + receiveCount: number, +): Promise { + const retryAfterSeconds = retryAfterHeader + ? parseRetryAfter(retryAfterHeader) + : 0; + + if (exceedsSqsMaxVisibility(retryAfterSeconds)) { + logger.warn("429 Retry-After exceeds SQS max — sending to DLQ", { + clientId, + targetId, + retryAfterSeconds, + }); + await sendToDlq(record.body); + return; + } + + const delaySec = + retryAfterSeconds > 0 + ? retryAfterSeconds + : jitteredBackoffSeconds(receiveCount); + + logger.warn("Rate limited (429) — requeuing", { + clientId, + targetId, + delaySec, + }); + await changeVisibility(record.receiptHandle, delaySec); + throw new Error("Rate limited — requeue"); +} diff --git a/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts b/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts new file mode 100644 index 00000000..142ecd7a --- /dev/null +++ b/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts @@ -0,0 +1,200 @@ +import { Agent } from "node:https"; +import { X509Certificate, createHash } from "node:crypto"; +import { checkServerIdentity } from "node:tls"; +import type { PeerCertificate } from "node:tls"; +import forge from "node-forge"; +import { GetObjectCommand, S3Client } from "@aws-sdk/client-s3"; +import { + GetSecretValueCommand, + SecretsManagerClient, +} from "@aws-sdk/client-secrets-manager"; +import type { CallbackTarget } from "@nhs-notify-client-callbacks/models"; +import { logger } from "services/logger"; + +const { + MTLS_CERT_SECRET_ARN, + MTLS_TEST_CA_S3_KEY, + MTLS_TEST_CERT_S3_BUCKET, + MTLS_TEST_CERT_S3_KEY, +} = process.env; +const CERT_EXPIRY_THRESHOLD_MS = + Number(process.env.CERT_EXPIRY_THRESHOLD_MS) || 86_400_000; + +const s3Client = new S3Client({}); +const secretsClient = new SecretsManagerClient({}); + +export const PERMANENT_TLS_ERROR_CODES = new Set([ + "CERT_HAS_EXPIRED", + "DEPTH_ZERO_SELF_SIGNED_CERT", + "ERR_CERT_PINNING_FAILED", + "ERR_TLS_CERT_ALTNAME_INVALID", + "SELF_SIGNED_CERT_IN_CHAIN", + "UNABLE_TO_VERIFY_LEAF_SIGNATURE", +]); + +type CertMaterial = { + key: string; + cert: string; + ca?: string; + validTo: Date; +}; + +let cachedMaterial: CertMaterial | undefined; + +async function loadFromSecretsManager(): Promise<{ + key: string; + cert: string; +}> { + const response = await secretsClient.send( + new GetSecretValueCommand({ SecretId: MTLS_CERT_SECRET_ARN }), + ); + + if (!response.SecretString) { + throw new Error("mTLS cert secret has no value"); + } + + const parsed = JSON.parse(response.SecretString) as { + key: string; + cert: string; + }; + return { key: parsed.key, cert: parsed.cert }; +} + +async function loadS3Object(bucket: string, key: string): Promise { + const response = await s3Client.send( + new GetObjectCommand({ Bucket: bucket, Key: key }), + ); + + if (!response.Body) { + throw new Error(`S3 object s3://${bucket}/${key} has no body`); + } + + return response.Body.transformToString(); +} + +async function loadFromS3(): Promise<{ + key: string; + cert: string; + ca?: string; +}> { + if (!MTLS_TEST_CERT_S3_BUCKET || !MTLS_TEST_CERT_S3_KEY) { + throw new Error( + "MTLS_TEST_CERT_S3_BUCKET and MTLS_TEST_CERT_S3_KEY are required in non-production", + ); + } + + const pem = await loadS3Object( + MTLS_TEST_CERT_S3_BUCKET, + MTLS_TEST_CERT_S3_KEY, + ); + + const pemObjects = forge.pem.decode(pem); + const keyObj = pemObjects.find((obj) => obj.type.includes("PRIVATE KEY")); + const certObj = pemObjects.find((obj) => obj.type.includes("CERTIFICATE")); + const key = keyObj ? forge.pem.encode(keyObj) : ""; + const cert = certObj ? forge.pem.encode(certObj) : ""; + + let ca: string | undefined; + if (MTLS_TEST_CA_S3_KEY) { + ca = await loadS3Object(MTLS_TEST_CERT_S3_BUCKET, MTLS_TEST_CA_S3_KEY); + } + + return { key, cert, ca }; +} + +async function loadCertMaterial(): Promise { + const isProduction = Boolean(MTLS_CERT_SECRET_ARN); + const raw = isProduction + ? await loadFromSecretsManager() + : await loadFromS3(); + + const x509 = new X509Certificate(raw.cert); + const validTo = new Date(x509.validTo); + + logger.info("mTLS certificate loaded", { + source: isProduction ? "SecretsManager" : "S3", + validTo: validTo.toISOString(), + }); + + return { + key: raw.key, + cert: raw.cert, + ca: "ca" in raw ? (raw.ca as string | undefined) : undefined, + validTo, + }; +} + +function isExpiringSoon(material: CertMaterial): boolean { + return material.validTo.getTime() - Date.now() < CERT_EXPIRY_THRESHOLD_MS; +} + +async function getMaterial(): Promise { + if (cachedMaterial && !isExpiringSoon(cachedMaterial)) { + return cachedMaterial; + } + + cachedMaterial = await loadCertMaterial(); + return cachedMaterial; +} + +export async function buildAgent(target: CallbackTarget): Promise { + const agentOptions: Record = { + keepAlive: false, + }; + + if (target.mtls.enabled) { + const material = await getMaterial(); + agentOptions.key = material.key; + agentOptions.cert = material.cert; + + if (material.ca) { + agentOptions.ca = material.ca; + } + } + + if (target.certPinning.enabled) { + const expectedHash = target.certPinning.spkiHash; + + if (!expectedHash) { + throw new Error( + `certPinning.spkiHash is required when certPinning is enabled for target '${target.targetId}'`, + ); + } + + /* eslint-disable sonarjs/function-return-type -- checkServerIdentity requires Error|undefined return */ + agentOptions.checkServerIdentity = ( + hostname: string, + peerCert: PeerCertificate, + ) => { + const defaultResult = checkServerIdentity(hostname, peerCert); + if (defaultResult) { + return defaultResult; + } + + const rawDer = peerCert.raw; + const x509 = new X509Certificate(rawDer); + const spkiDer = x509.publicKey.export({ + type: "spki", + format: "der", + }) as Buffer; + const actualHash = createHash("sha256").update(spkiDer).digest("base64"); + + if (actualHash !== expectedHash) { + const error = new Error( + `Certificate pinning failed: expected SPKI hash '${expectedHash}', got '${actualHash}'`, + ); + (error as NodeJS.ErrnoException).code = "ERR_CERT_PINNING_FAILED"; + return error; + } + + return undefined; + }; + /* eslint-enable sonarjs/function-return-type */ + } + + return new Agent(agentOptions as ConstructorParameters[0]); +} + +export function resetCache(): void { + cachedMaterial = undefined; +} diff --git a/lambdas/https-client-lambda/src/services/dlq-sender.ts b/lambdas/https-client-lambda/src/services/dlq-sender.ts new file mode 100644 index 00000000..af61a666 --- /dev/null +++ b/lambdas/https-client-lambda/src/services/dlq-sender.ts @@ -0,0 +1,17 @@ +import { SQSClient, SendMessageCommand } from "@aws-sdk/client-sqs"; + +const sqsClient = new SQSClient({}); + +export async function sendToDlq(messageBody: string): Promise { + const { DLQ_URL } = process.env; + if (!DLQ_URL) { + throw new Error("DLQ_URL is required"); + } + + await sqsClient.send( + new SendMessageCommand({ + QueueUrl: DLQ_URL, + MessageBody: messageBody, + }), + ); +} diff --git a/lambdas/https-client-lambda/src/services/endpoint-gate.ts b/lambdas/https-client-lambda/src/services/endpoint-gate.ts new file mode 100644 index 00000000..73721246 --- /dev/null +++ b/lambdas/https-client-lambda/src/services/endpoint-gate.ts @@ -0,0 +1,179 @@ +import { type RedisClientType, createClient } from "@redis/client"; +import { createHash } from "node:crypto"; +import { logger } from "services/logger"; +import admitLuaSrc from "services/admit.lua"; +import recordResultLuaSrc from "services/record-result.lua"; + +export type AdmitResultAllowed = { + allowed: true; + probe: boolean; + effectiveRate: number; +}; + +export type AdmitResultDenied = { + allowed: false; + reason: "circuit_open" | "rate_limited"; + retryAfterMs: number; + effectiveRate: number; +}; + +export type AdmitResult = AdmitResultAllowed | AdmitResultDenied; + +export type RecordResultOutcome = + | { ok: true; state: "closed" } + | { ok: false; state: "opened" }; + +export type EndpointGateConfig = { + burstCapacity: number; + cbProbeIntervalMs: number; + decayPeriodMs: number; + cbWindowPeriodMs: number; + cbErrorThreshold: number; + cbMinAttempts: number; + cbCooldownMs: number; +}; + +let admitSha: string | undefined; +let recordResultSha: string | undefined; + +function computeSha1(script: string): string { + // eslint-disable-next-line sonarjs/hashing -- SHA-1 required by Redis EVALSHA protocol, not a security context + return createHash("sha1").update(script).digest("hex"); +} + +export async function admit( + client: RedisClientType, + targetId: string, + refillPerSec: number, + cbEnabled: boolean, + config: EndpointGateConfig, +): Promise { + const rlKey = `rl:${targetId}`; + const cbKey = `cb:${targetId}`; + const now = Date.now().toString(); + + /* eslint-disable sonarjs/null-dereference -- refillPerSec is typed as number, cannot be null */ + const args = [ + now, + refillPerSec.toString(), + config.burstCapacity.toString(), + config.cbProbeIntervalMs.toString(), + cbEnabled ? "1" : "0", + config.decayPeriodMs.toString(), + ]; + /* eslint-enable sonarjs/null-dereference */ + + let result: string; + + if (!admitSha) { + admitSha = computeSha1(admitLuaSrc); + } + + try { + result = await client.sendCommand([ + "EVALSHA", + admitSha, + "2", + rlKey, + cbKey, + ...args, + ]); + } catch (error: unknown) { + const isNoScript = + error instanceof Error && error.message.includes("NOSCRIPT"); + if (!isNoScript) { + throw error; + } + result = await client.sendCommand([ + "EVAL", + admitLuaSrc, + "2", + rlKey, + cbKey, + ...args, + ]); + } + + return JSON.parse(result) as AdmitResult; +} + +export async function recordResult( + client: RedisClientType, + targetId: string, + success: boolean, + config: EndpointGateConfig, +): Promise { + const cbKey = `cb:${targetId}`; + const now = Date.now().toString(); + + const args = [ + now, + success ? "1" : "0", + config.cbWindowPeriodMs.toString(), + config.cbErrorThreshold.toString(), + config.cbMinAttempts.toString(), + config.cbCooldownMs.toString(), + config.decayPeriodMs.toString(), + ]; + + let result: string; + + if (!recordResultSha) { + recordResultSha = computeSha1(recordResultLuaSrc); + } + + try { + result = await client.sendCommand([ + "EVALSHA", + recordResultSha, + "1", + cbKey, + ...args, + ]); + } catch (error: unknown) { + const isNoScript = + error instanceof Error && error.message.includes("NOSCRIPT"); + if (!isNoScript) { + throw error; + } + result = await client.sendCommand([ + "EVAL", + recordResultLuaSrc, + "1", + cbKey, + ...args, + ]); + } + + return JSON.parse(result) as RecordResultOutcome; +} + +export function resetAdmitSha(): void { + admitSha = undefined; + recordResultSha = undefined; +} + +let redisClient: RedisClientType | undefined; + +export async function getRedisClient(): Promise { + if (redisClient?.isOpen) { + return redisClient; + } + + const endpoint = process.env.ELASTICACHE_ENDPOINT; + if (!endpoint) { + throw new Error("ELASTICACHE_ENDPOINT is required"); + } + + redisClient = createClient({ url: `rediss://${endpoint}:6379` }); + redisClient.on("error", (err) => { + logger.error("Redis connection error", { error: String(err) }); + }); + + await redisClient.connect(); + return redisClient; +} + +export function resetRedisClient(): void { + redisClient = undefined; +} diff --git a/lambdas/https-client-lambda/src/services/logger.ts b/lambdas/https-client-lambda/src/services/logger.ts new file mode 100644 index 00000000..5c373b25 --- /dev/null +++ b/lambdas/https-client-lambda/src/services/logger.ts @@ -0,0 +1 @@ +export * from "@nhs-notify-client-callbacks/logger"; diff --git a/lambdas/client-transform-filter-lambda/src/services/payload-signer.ts b/lambdas/https-client-lambda/src/services/payload-signer.ts similarity index 100% rename from lambdas/client-transform-filter-lambda/src/services/payload-signer.ts rename to lambdas/https-client-lambda/src/services/payload-signer.ts index cf69cac8..e2174b76 100644 --- a/lambdas/client-transform-filter-lambda/src/services/payload-signer.ts +++ b/lambdas/https-client-lambda/src/services/payload-signer.ts @@ -2,9 +2,9 @@ import { createHmac } from "node:crypto"; import type { ClientCallbackPayload } from "@nhs-notify-client-callbacks/models"; export function signPayload( - payload: ClientCallbackPayload, applicationId: string, apiKey: string, + payload: ClientCallbackPayload, ): string { return createHmac("sha256", `${applicationId}.${apiKey}`) .update(JSON.stringify(payload)) diff --git a/lambdas/https-client-lambda/src/services/record-result.lua b/lambdas/https-client-lambda/src/services/record-result.lua new file mode 100644 index 00000000..89b81279 --- /dev/null +++ b/lambdas/https-client-lambda/src/services/record-result.lua @@ -0,0 +1,92 @@ +-- record-result.lua +-- Atomic two-window sliding circuit-breaker state update. +-- KEYS[1] = cb:{targetId} (circuit breaker hash) +-- ARGV[1] = now (epoch ms) +-- ARGV[2] = success ("1" or "0") +-- ARGV[3] = cbWindowPeriodMs +-- ARGV[4] = cbErrorThreshold (float, e.g. "0.5") +-- ARGV[5] = cbMinAttempts (integer) +-- ARGV[6] = cbCooldownMs +-- ARGV[7] = decayPeriodMs + +local cb_key = KEYS[1] +local now = tonumber(ARGV[1]) +local success = ARGV[2] == "1" +local windowPeriodMs = tonumber(ARGV[3]) +local errorThreshold = tonumber(ARGV[4]) +local minAttempts = tonumber(ARGV[5]) +local cooldownMs = tonumber(ARGV[6]) +local decayPeriodMs = tonumber(ARGV[7]) + +-- Load current state +local opened_until_ms = tonumber(redis.call("HGET", cb_key, "opened_until_ms") or "0") or 0 +local cb_window_from = tonumber(redis.call("HGET", cb_key, "cb_window_from") or "0") or 0 +local cb_failures = tonumber(redis.call("HGET", cb_key, "cb_failures") or "0") or 0 +local cb_attempts = tonumber(redis.call("HGET", cb_key, "cb_attempts") or "0") or 0 +local cb_prev_failures = tonumber(redis.call("HGET", cb_key, "cb_prev_failures") or "0") or 0 +local cb_prev_attempts = tonumber(redis.call("HGET", cb_key, "cb_prev_attempts") or "0") or 0 + +-- Initialise window if not set +if cb_window_from == 0 then + cb_window_from = now +end + +-- Check for window expiry and roll +if (now - cb_window_from) >= windowPeriodMs then + cb_prev_failures = cb_failures + cb_prev_attempts = cb_attempts + cb_failures = 0 + cb_attempts = 0 + cb_window_from = now +end + +-- Increment counters +cb_attempts = cb_attempts + 1 +if not success then + cb_failures = cb_failures + 1 +end + +-- Compute two-window blended error rate +local elapsed_in_window = now - cb_window_from +local prev_weight = 0 +if windowPeriodMs > 0 and elapsed_in_window < windowPeriodMs then + prev_weight = 1 - (elapsed_in_window / windowPeriodMs) +end + +local blended_failures = cb_prev_failures * prev_weight + cb_failures +local blended_attempts = cb_prev_attempts * prev_weight + cb_attempts + +local state = "closed" + +-- Check if we should open the circuit +if blended_attempts >= minAttempts and blended_attempts > 0 then + local error_rate = blended_failures / blended_attempts + if error_rate >= errorThreshold then + opened_until_ms = now + cooldownMs + state = "opened" + end +end + +-- During active decay, preserve opened_until_ms as decay start marker +if opened_until_ms > 0 and now >= opened_until_ms then + local elapsed_since_close = now - opened_until_ms + if elapsed_since_close >= decayPeriodMs then + opened_until_ms = 0 + end +end + +-- Write updated state +redis.call("HSET", cb_key, + "opened_until_ms", tostring(opened_until_ms), + "cb_window_from", tostring(cb_window_from), + "cb_failures", tostring(cb_failures), + "cb_attempts", tostring(cb_attempts), + "cb_prev_failures", tostring(cb_prev_failures), + "cb_prev_attempts", tostring(cb_prev_attempts) +) + +if state == "opened" then + return cjson.encode({ ok = false, state = "opened" }) +end + +return cjson.encode({ ok = true, state = "closed" }) diff --git a/lambdas/https-client-lambda/src/services/sqs-visibility.ts b/lambdas/https-client-lambda/src/services/sqs-visibility.ts new file mode 100644 index 00000000..e6fe2720 --- /dev/null +++ b/lambdas/https-client-lambda/src/services/sqs-visibility.ts @@ -0,0 +1,21 @@ +import { ChangeMessageVisibilityCommand, SQSClient } from "@aws-sdk/client-sqs"; + +const sqsClient = new SQSClient({}); + +export async function changeVisibility( + receiptHandle: string, + visibilityTimeoutSeconds: number, +): Promise { + const { QUEUE_URL } = process.env; + if (!QUEUE_URL) { + throw new Error("QUEUE_URL is required"); + } + + await sqsClient.send( + new ChangeMessageVisibilityCommand({ + QueueUrl: QUEUE_URL, + ReceiptHandle: receiptHandle, + VisibilityTimeout: Math.floor(visibilityTimeoutSeconds), + }), + ); +} diff --git a/lambdas/https-client-lambda/src/services/ssm-applications-map.ts b/lambdas/https-client-lambda/src/services/ssm-applications-map.ts new file mode 100644 index 00000000..73f3ea61 --- /dev/null +++ b/lambdas/https-client-lambda/src/services/ssm-applications-map.ts @@ -0,0 +1,62 @@ +import { GetParameterCommand, SSMClient } from "@aws-sdk/client-ssm"; +import { logger } from "services/logger"; + +const ssmClient = new SSMClient({}); + +let cachedMap: Map | undefined; + +async function loadMap(): Promise> { + if (cachedMap) { + return cachedMap; + } + + const { APPLICATIONS_MAP_PARAMETER } = process.env; + if (!APPLICATIONS_MAP_PARAMETER) { + throw new Error("APPLICATIONS_MAP_PARAMETER is required"); + } + + const response = await ssmClient.send( + new GetParameterCommand({ + Name: APPLICATIONS_MAP_PARAMETER, + WithDecryption: true, + }), + ); + + if (!response.Parameter?.Value) { + throw new Error( + `SSM parameter '${APPLICATIONS_MAP_PARAMETER}' not found or has no value`, + ); + } + + let parsed: Record; + try { + parsed = JSON.parse(response.Parameter.Value) as Record; + } catch { + throw new Error( + `SSM parameter '${APPLICATIONS_MAP_PARAMETER}' contains invalid JSON`, + ); + } + + cachedMap = new Map(Object.entries(parsed)); + logger.info("Applications map loaded from SSM", { + parameterName: APPLICATIONS_MAP_PARAMETER, + }); + return cachedMap; +} + +export async function getApplicationId(clientId: string): Promise { + const map = await loadMap(); + const applicationId = map.get(clientId); + + if (!applicationId) { + throw new Error( + `No applicationId found for clientId '${clientId}' in SSM map`, + ); + } + + return applicationId; +} + +export function resetCache(): void { + cachedMap = undefined; +} diff --git a/lambdas/https-client-lambda/tsconfig.json b/lambdas/https-client-lambda/tsconfig.json new file mode 100644 index 00000000..a50e6fc0 --- /dev/null +++ b/lambdas/https-client-lambda/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "isolatedModules": true, + "paths": { + "*": [ + "./src/*" + ] + } + }, + "extends": "../../tsconfig.base.json", + "include": [ + "src/**/*" + ] +} diff --git a/lambdas/mock-webhook-lambda/src/__tests__/index.test.ts b/lambdas/mock-webhook-lambda/src/__tests__/index.test.ts index 6f3cc917..d7463722 100644 --- a/lambdas/mock-webhook-lambda/src/__tests__/index.test.ts +++ b/lambdas/mock-webhook-lambda/src/__tests__/index.test.ts @@ -1,6 +1,14 @@ +import { X509Certificate } from "node:crypto"; import type { APIGatewayProxyEvent } from "aws-lambda"; import { handler } from "index"; +jest.mock("node:crypto", () => ({ + ...jest.requireActual("node:crypto"), + X509Certificate: jest.fn(), +})); + +const mockX509Certificate = X509Certificate as unknown as jest.Mock; + const TEST_API_KEY = "test-api-key"; jest.mock("@nhs-notify-client-callbacks/logger", () => { @@ -32,6 +40,28 @@ const createMockEvent = ( ): APIGatewayProxyEvent => ({ body, headers, rawPath }) as unknown as APIGatewayProxyEvent; +const createAlbEvent = ( + body: string | null, + headers: Record = DEFAULT_HEADERS, + extraHeaders: Record = {}, +): APIGatewayProxyEvent => + ({ + body, + path: "/target-abc", + httpMethod: "POST", + headers: { ...headers, ...extraHeaders }, + requestContext: { + elb: { + targetGroupArn: + "arn:aws:elasticloadbalancing:eu-west-2:123456789012:targetgroup/mock/abc", + }, + }, + }) as unknown as APIGatewayProxyEvent; + +const FAKE_CERT_HEADER = encodeURIComponent( + "-----BEGIN CERTIFICATE-----\nZmFrZQ==\n-----END CERTIFICATE-----", +); + describe("Mock Webhook Lambda", () => { beforeAll(() => { process.env.API_KEY = TEST_API_KEY; @@ -381,3 +411,144 @@ describe("Mock Webhook Lambda", () => { }); }); }); + +describe("ALB mTLS certificate logging", () => { + beforeAll(() => { + process.env.API_KEY = TEST_API_KEY; + }); + + afterAll(() => { + delete process.env.API_KEY; + }); + + beforeEach(() => { + mockX509Certificate.mockReset(); + mockX509Certificate.mockImplementation(() => ({ + validFrom: new Date(Date.now() - 86_400_000).toString(), + validTo: new Date(Date.now() + 86_400_000).toString(), + })); + }); + + it("logs isMtls=false and proceeds when ALB invocation has no client certificate header", async () => { + const event = createAlbEvent(JSON.stringify({ data: [] })); + const result = await handler(event); + + expect(result.statusCode).not.toBe(401); + expect(mockLogger.info).toHaveBeenCalledWith( + "Mock webhook invoked without mTLS", + expect.objectContaining({ isMtls: false }), + ); + }); + + it("logs isMtls=false and proceeds when client certificate header cannot be parsed", async () => { + mockX509Certificate.mockImplementationOnce(() => { + throw new Error("Invalid certificate"); + }); + const event = createAlbEvent( + JSON.stringify({ data: [] }), + DEFAULT_HEADERS, + { "x-amzn-mtls-clientcert": FAKE_CERT_HEADER }, + ); + const result = await handler(event); + + expect(result.statusCode).not.toBe(401); + expect(mockLogger.info).toHaveBeenCalledWith( + "Mock webhook invoked without mTLS", + expect.objectContaining({ isMtls: false }), + ); + }); + + it("logs isMtls=false and proceeds when client certificate is expired", async () => { + mockX509Certificate.mockImplementationOnce(() => ({ + validFrom: new Date(Date.now() - 172_800_000).toString(), + validTo: new Date(Date.now() - 86_400_000).toString(), + })); + const event = createAlbEvent( + JSON.stringify({ data: [] }), + DEFAULT_HEADERS, + { "x-amzn-mtls-clientcert": FAKE_CERT_HEADER }, + ); + const result = await handler(event); + + expect(result.statusCode).not.toBe(401); + expect(mockLogger.info).toHaveBeenCalledWith( + "Mock webhook invoked without mTLS", + expect.objectContaining({ isMtls: false }), + ); + }); + + it("logs isMtls=true and proceeds when certificate is valid", async () => { + const event = createAlbEvent( + JSON.stringify({ data: [] }), + { "x-api-key": "wrong-key" }, + { "x-amzn-mtls-clientcert": FAKE_CERT_HEADER }, + ); + const result = await handler(event); + + expect(mockLogger.info).toHaveBeenCalledWith( + "mTLS client certificate verified", + expect.objectContaining({ isMtls: true }), + ); + expect(result.statusCode).toBe(401); + const body = JSON.parse(result.body); + expect(body.message).toBe("Unauthorized"); + }); + + it("processes request successfully when certificate is valid and API key is correct", async () => { + const callback = { + data: [ + { + type: "MessageStatus", + attributes: { + messageId: "msg-alb-mtls", + messageReference: "ref-alb", + messageStatus: "delivered", + timestamp: "2026-01-01T00:00:00Z", + }, + links: { message: "some-link" }, + meta: { idempotencyKey: "idem-key-alb" }, + }, + ], + }; + const event = createAlbEvent(JSON.stringify(callback), DEFAULT_HEADERS, { + "x-amzn-mtls-clientcert": FAKE_CERT_HEADER, + }); + const result = await handler(event); + + expect(result.statusCode).toBe(200); + const body = JSON.parse(result.body); + expect(body.message).toBe("Callback received"); + }); + + it("processes non-mTLS ALB request successfully when API key is correct", async () => { + const callback = { + data: [ + { + type: "MessageStatus", + attributes: { + messageId: "msg-alb-no-mtls", + messageReference: "ref-alb", + messageStatus: "delivered", + timestamp: "2026-01-01T00:00:00Z", + }, + links: { message: "some-link" }, + meta: { idempotencyKey: "idem-key-alb-no-mtls" }, + }, + ], + }; + const event = createAlbEvent(JSON.stringify(callback), DEFAULT_HEADERS); + const result = await handler(event); + + expect(result.statusCode).toBe(200); + const body = JSON.parse(result.body); + expect(body.message).toBe("Callback received"); + }); + + it("non-ALB invocations skip certificate check", async () => { + const event = createMockEvent(JSON.stringify({ data: [] })); + const result = await handler(event); + + const body = JSON.parse(result.body); + expect(body.message).not.toBe("Mutual TLS authentication required"); + }); +}); diff --git a/lambdas/mock-webhook-lambda/src/index.ts b/lambdas/mock-webhook-lambda/src/index.ts index 081ef3b9..5d68e279 100644 --- a/lambdas/mock-webhook-lambda/src/index.ts +++ b/lambdas/mock-webhook-lambda/src/index.ts @@ -1,9 +1,33 @@ +import { X509Certificate } from "node:crypto"; import type { APIGatewayProxyEvent, APIGatewayProxyResult } from "aws-lambda"; import { Logger } from "@nhs-notify-client-callbacks/logger"; import type { ClientCallbackPayload } from "@nhs-notify-client-callbacks/models"; const logger = new Logger(); +function verifyClientCertificate(certHeader: string | undefined): { + valid: boolean; + reason?: string; +} { + if (!certHeader) { + return { valid: false, reason: "No client certificate provided" }; + } + try { + const pem = decodeURIComponent(certHeader); + const cert = new X509Certificate(pem); + const now = new Date(); + if (now < new Date(cert.validFrom) || now > new Date(cert.validTo)) { + return { + valid: false, + reason: "Client certificate is not within its validity period", + }; + } + return { valid: true }; + } catch { + return { valid: false, reason: "Failed to parse client certificate" }; + } +} + function isClientCallbackPayload( value: unknown, ): value is ClientCallbackPayload { @@ -36,20 +60,44 @@ function isClientCallbackPayload( async function buildResponse( event: APIGatewayProxyEvent, ): Promise { - const eventWithFunctionUrlFields = event as APIGatewayProxyEvent & { + const eventWithContextFields = event as APIGatewayProxyEvent & { rawPath?: string; - requestContext?: { http?: { method?: string } }; + requestContext?: { + http?: { method?: string }; + elb?: { targetGroupArn: string }; + }; }; const headers = Object.fromEntries( Object.entries(event.headers).map(([k, v]) => [String(k).toLowerCase(), v]), ) as Record; - const path = event.path ?? eventWithFunctionUrlFields.rawPath; + const path = event.path ?? eventWithContextFields.rawPath; + + const isAlbInvocation = Boolean(eventWithContextFields.requestContext?.elb); + let isMtls = false; + if (isAlbInvocation) { + const certResult = verifyClientCertificate( + headers["x-amzn-mtls-clientcert"], + ); + isMtls = certResult.valid; + if (isMtls) { + logger.info("mTLS client certificate verified", { + fingerprint: headers["x-amzn-mtls-clientcert-fingerprint"] ?? "", + isMtls: true, + }); + } else { + logger.info("Mock webhook invoked without mTLS", { + isMtls: false, + reason: certResult.reason, + }); + } + } logger.info("Mock webhook invoked", { path, method: event.httpMethod, hasBody: Boolean(event.body), + isMtls, "x-api-key": headers["x-api-key"], "x-hmac-sha256-signature": headers["x-hmac-sha256-signature"], payload: event.body, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 753d95da..3dde1cbc 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -6,6 +6,9 @@ settings: catalogs: app: + '@redis/client': + specifier: ^1.5.14 + version: 1.6.1 async-wait-until: specifier: ^2.0.31 version: 2.0.31 @@ -15,9 +18,15 @@ catalogs: cloudevents: specifier: ^10.0.0 version: 10.0.0 + node-forge: + specifier: ^1.3.1 + version: 1.4.0 p-map: specifier: ^4.0.0 version: 4.0.0 + picocolors: + specifier: ^1.1.1 + version: 1.1.1 pino: specifier: ^10.3.1 version: 10.3.1 @@ -40,6 +49,9 @@ catalogs: '@aws-sdk/client-s3': specifier: ^3.1024.0 version: 3.1029.0 + '@aws-sdk/client-secrets-manager': + specifier: ^3.1023.0 + version: 3.1029.0 '@aws-sdk/client-sqs': specifier: ^3.1023.0 version: 3.1026.0 @@ -136,6 +148,9 @@ catalogs: '@types/node': specifier: ^24.12.0 version: 24.12.0 + '@types/node-forge': + specifier: ^1.3.11 + version: 1.3.14 '@types/yargs': specifier: ^17.0.24 version: 17.0.35 @@ -264,9 +279,9 @@ importers: '@aws-sdk/client-s3': specifier: catalog:aws version: 3.1029.0 - '@aws-sdk/client-ssm': - specifier: catalog:aws - version: 3.1029.0 + '@nhs-notify-client-callbacks/config-cache': + specifier: workspace:* + version: link:../../src/config-cache '@nhs-notify-client-callbacks/logger': specifier: workspace:* version: link:../../src/logger @@ -311,6 +326,70 @@ importers: specifier: catalog:tools version: 5.9.3 + lambdas/https-client-lambda: + dependencies: + '@aws-sdk/client-s3': + specifier: catalog:aws + version: 3.1029.0 + '@aws-sdk/client-secrets-manager': + specifier: catalog:aws + version: 3.1029.0 + '@aws-sdk/client-sqs': + specifier: catalog:aws + version: 3.1026.0 + '@aws-sdk/client-ssm': + specifier: catalog:aws + version: 3.1029.0 + '@nhs-notify-client-callbacks/config-cache': + specifier: workspace:* + version: link:../../src/config-cache + '@nhs-notify-client-callbacks/logger': + specifier: workspace:* + version: link:../../src/logger + '@nhs-notify-client-callbacks/models': + specifier: workspace:* + version: link:../../src/models + '@redis/client': + specifier: catalog:app + version: 1.6.1 + aws-embedded-metrics: + specifier: catalog:app + version: 4.2.1 + esbuild: + specifier: catalog:tools + version: 0.28.0 + node-forge: + specifier: catalog:app + version: 1.4.0 + p-map: + specifier: catalog:app + version: 4.0.0 + devDependencies: + '@tsconfig/node22': + specifier: catalog:tools + version: 22.0.5 + '@types/aws-lambda': + specifier: catalog:tools + version: 8.10.161 + '@types/jest': + specifier: catalog:test + version: 30.0.0 + '@types/node': + specifier: catalog:tools + version: 24.12.0 + '@types/node-forge': + specifier: catalog:tools + version: 1.3.14 + eslint: + specifier: catalog:lint + version: 9.39.4(jiti@2.6.1) + jest: + specifier: catalog:test + version: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) + typescript: + specifier: catalog:tools + version: 5.9.3 + lambdas/mock-webhook-lambda: dependencies: '@nhs-notify-client-callbacks/logger': @@ -351,6 +430,34 @@ importers: specifier: catalog:tools version: 5.9.3 + src/config-cache: + dependencies: + '@nhs-notify-client-callbacks/models': + specifier: workspace:* + version: link:../models + devDependencies: + '@tsconfig/node22': + specifier: catalog:tools + version: 22.0.5 + '@types/jest': + specifier: catalog:test + version: 30.0.0 + '@types/node': + specifier: catalog:tools + version: 24.12.0 + eslint: + specifier: catalog:lint + version: 9.39.4(jiti@2.6.1) + jest: + specifier: catalog:test + version: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) + ts-jest: + specifier: catalog:test + version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)))(typescript@5.9.3) + typescript: + specifier: catalog:tools + version: 5.9.3 + src/logger: dependencies: pino: @@ -529,6 +636,9 @@ importers: '@nhs-notify-client-callbacks/models': specifier: workspace:* version: link:../../src/models + picocolors: + specifier: catalog:app + version: 1.1.1 table: specifier: catalog:app version: 6.9.0 @@ -605,6 +715,10 @@ packages: resolution: {integrity: sha512-OuA8RZTxsAaHDcI25j2NGLMaYFI2WpJdDzK3uLmVBmaHwjQKQZOUDVVBcln8pNo3IgkY+HRSJhRR4/xlM//UyQ==} engines: {node: '>=20.0.0'} + '@aws-sdk/client-secrets-manager@3.1029.0': + resolution: {integrity: sha512-OtNiJSEXA8+KkFA1aS24BOFkJoRlxwJ8tBLiUUYKVwLu8L3Smfz2oj4BJwRlv0FzWTqrmJkFC8kly/cAZqU2UQ==} + engines: {node: '>=20.0.0'} + '@aws-sdk/client-sqs@3.1026.0': resolution: {integrity: sha512-b7z2WI1tqObk4U7vUbmBfXIeFhxKbFr7xQ4rWi879iFl5aSPvpd1WAmLi6z1boVKTEwEqHALuE5MyGBHhOCy5A==} engines: {node: '>=20.0.0'} @@ -1697,6 +1811,10 @@ packages: resolution: {integrity: sha512-QNqXyfVS2wm9hweSYD2O7F0G06uurj9kZ96TRQE5Y9hU7+tgdZwIkbAKc5Ocy1HxEY2kuDQa6cQ1WRs/O5LFKA==} engines: {node: ^12.20.0 || ^14.18.0 || >=16.0.0} + '@redis/client@1.6.1': + resolution: {integrity: sha512-/KCsg3xSlR+nCK8/8ZYSknYxvXHwubJrU82F3Lm1Fp6789VQ0/3RJKfsmRXjqfaTA++23CvC3hqmqe/2GEt6Kw==} + engines: {node: '>=14'} + '@rtsao/scc@1.1.0': resolution: {integrity: sha512-zt6OdqaDoOnJ1ZYsCYGt9YmWzDXl4vQdKTyJev62gFhRGKdx7mcT54V9KIjg+d2wi9EXsPvAPKe7i7WjfVWB8g==} @@ -1985,6 +2103,9 @@ packages: '@types/json5@0.0.29': resolution: {integrity: sha512-dRLjCWHYg4oaA77cxO64oO+7JwCwnIzkZPdrrC71jQmQtlhM556pwKo5bUzqvZndkVbeFLIIi+9TC40JNF5hNQ==} + '@types/node-forge@1.3.14': + resolution: {integrity: sha512-mhVF2BnD4BO+jtOp7z1CdzaK4mbuK0LLQYAvdOLqHTavxFNq4zA1EmYkpnFjP8HOUzedfQkRnp0E2ulSAYSzAw==} + '@types/node@24.12.0': resolution: {integrity: sha512-GYDxsZi3ChgmckRT9HPU0WEhKLP08ev/Yfcq2AstjrDASOYCSXeyjDsHg4v5t4jOj7cyDX3vmprafKlWIG9MXQ==} @@ -2453,6 +2574,10 @@ packages: resolution: {integrity: sha512-uyzC+PpMMRawbouHO+3mlisr3QfEDObmo2pN4oTTF6dZncZgpIzdasZx0tRBFI1dMsqCLZZXMtz8cUuvYqHdbw==} engines: {node: '>=20 <=24'} + cluster-key-slot@1.1.2: + resolution: {integrity: sha512-RMr0FhtfXemyinomL4hrWcYJxmX6deFdCxpJzhDttxgO1+bcCnkk+9drydLVDmAMG7NE6aN/fl4F7ucU/90gAA==} + engines: {node: '>=0.10.0'} + co@4.6.0: resolution: {integrity: sha512-QVb0dM5HvG+uaxitm8wONl7jltx8dqhfU33DcqtOZcLSVIKSDDLDi7+0LbAKiyI8hD9u42m2YxXSkMGWThaecQ==} engines: {iojs: '>= 1.0.0', node: '>= 0.12.0'} @@ -3024,6 +3149,10 @@ packages: resolution: {integrity: sha512-SFdFmIJi+ybC0vjlHN0ZGVGHc3lgE0DxPAT0djjVg+kjOnSqclqmj0KQ7ykTOLP6YxoqOvuAODGdcHJn+43q3g==} engines: {node: '>= 0.4'} + generic-pool@3.9.0: + resolution: {integrity: sha512-hymDOu5B53XvN4QT9dBmZxPX4CWhBPPLguTZ9MMFeFa/Kg0xWVfylOVNlJji/E7yTZWFd/q9GO5TxDLq156D7g==} + engines: {node: '>= 4'} + gensync@1.0.0-beta.2: resolution: {integrity: sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==} engines: {node: '>=6.9.0'} @@ -3675,6 +3804,10 @@ packages: resolution: {integrity: sha512-pyFS63ptit/P5WqUkt+UUfe+4oevH+bFeIiPPdfb0pFeYEu/1ELnJu5l+5EcTKYL5M7zaAa7S8ddywgXypqKCw==} engines: {node: '>= 0.4'} + node-forge@1.4.0: + resolution: {integrity: sha512-LarFH0+6VfriEhqMMcLX2F7SwSXeWwnEAJEsYm5QKWchiVYVvJyV9v7UDvUv+w5HO23ZpQTXDv/GxdDdMyOuoQ==} + engines: {node: '>= 6.13.0'} + node-int64@0.4.0: resolution: {integrity: sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw==} @@ -4418,6 +4551,9 @@ packages: yallist@3.1.1: resolution: {integrity: sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==} + yallist@4.0.0: + resolution: {integrity: sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==} + yaml@2.8.3: resolution: {integrity: sha512-AvbaCLOO2Otw/lW5bmh9d/WEdcDFdQp2Z2ZUH3pX9U2ihyUY0nvLv7J6TrWowklRGPYbB/IuIMfYgxaCPg5Bpg==} engines: {node: '>= 14.6'} @@ -4702,6 +4838,50 @@ snapshots: transitivePeerDependencies: - aws-crt + '@aws-sdk/client-secrets-manager@3.1029.0': + dependencies: + '@aws-crypto/sha256-browser': 5.2.0 + '@aws-crypto/sha256-js': 5.2.0 + '@aws-sdk/core': 3.973.27 + '@aws-sdk/credential-provider-node': 3.972.30 + '@aws-sdk/middleware-host-header': 3.972.9 + '@aws-sdk/middleware-logger': 3.972.9 + '@aws-sdk/middleware-recursion-detection': 3.972.10 + '@aws-sdk/middleware-user-agent': 3.972.29 + '@aws-sdk/region-config-resolver': 3.972.11 + '@aws-sdk/types': 3.973.7 + '@aws-sdk/util-endpoints': 3.996.6 + '@aws-sdk/util-user-agent-browser': 3.972.9 + '@aws-sdk/util-user-agent-node': 3.973.15 + '@smithy/config-resolver': 4.4.14 + '@smithy/core': 3.23.14 + '@smithy/fetch-http-handler': 5.3.16 + '@smithy/hash-node': 4.2.13 + '@smithy/invalid-dependency': 4.2.13 + '@smithy/middleware-content-length': 4.2.13 + '@smithy/middleware-endpoint': 4.4.29 + '@smithy/middleware-retry': 4.5.0 + '@smithy/middleware-serde': 4.2.17 + '@smithy/middleware-stack': 4.2.13 + '@smithy/node-config-provider': 4.3.13 + '@smithy/node-http-handler': 4.5.2 + '@smithy/protocol-http': 5.3.13 + '@smithy/smithy-client': 4.12.9 + '@smithy/types': 4.14.0 + '@smithy/url-parser': 4.2.13 + '@smithy/util-base64': 4.3.2 + '@smithy/util-body-length-browser': 4.2.2 + '@smithy/util-body-length-node': 4.2.3 + '@smithy/util-defaults-mode-browser': 4.3.45 + '@smithy/util-defaults-mode-node': 4.2.49 + '@smithy/util-endpoints': 3.3.4 + '@smithy/util-middleware': 4.2.13 + '@smithy/util-retry': 4.3.0 + '@smithy/util-utf8': 4.2.2 + tslib: 2.8.1 + transitivePeerDependencies: + - aws-crt + '@aws-sdk/client-sqs@3.1026.0': dependencies: '@aws-crypto/sha256-browser': 5.2.0 @@ -6068,6 +6248,12 @@ snapshots: '@pkgr/core@0.2.9': {} + '@redis/client@1.6.1': + dependencies: + cluster-key-slot: 1.1.2 + generic-pool: 3.9.0 + yallist: 4.0.0 + '@rtsao/scc@1.1.0': {} '@sinclair/typebox@0.34.49': {} @@ -6495,6 +6681,10 @@ snapshots: '@types/json5@0.0.29': {} + '@types/node-forge@1.3.14': + dependencies: + '@types/node': 25.5.0 + '@types/node@24.12.0': dependencies: undici-types: 7.16.0 @@ -6984,6 +7174,8 @@ snapshots: util: 0.12.5 uuid: 8.3.2 + cluster-key-slot@1.1.2: {} + co@4.6.0: {} collect-v8-coverage@1.0.3: {} @@ -7768,6 +7960,8 @@ snapshots: generator-function@2.0.1: {} + generic-pool@3.9.0: {} + gensync@1.0.0-beta.2: {} get-caller-file@2.0.5: {} @@ -8719,6 +8913,8 @@ snapshots: object.entries: 1.1.9 semver: 6.3.1 + node-forge@1.4.0: {} + node-int64@0.4.0: {} node-releases@2.0.36: {} @@ -9639,6 +9835,8 @@ snapshots: yallist@3.1.1: {} + yallist@4.0.0: {} + yaml@2.8.3: {} yargs-parser@20.2.9: {} diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index cc1bdeb4..f8f8a39b 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -8,10 +8,13 @@ blockExoticSubdeps: true catalogs: app: + "@redis/client": "^1.5.14" async-wait-until: "^2.0.31" aws-embedded-metrics: "^4.2.1" cloudevents: "^10.0.0" + node-forge: "^1.3.1" p-map: "^4.0.0" + picocolors: "^1.1.1" pino: "^10.3.1" table: "^6.9.0" yargs: "^17.7.2" @@ -20,6 +23,7 @@ catalogs: "@aws-sdk/client-cloudwatch": "^3.1025.0" "@aws-sdk/client-cloudwatch-logs": "^3.1023.0" "@aws-sdk/client-s3": "^3.1024.0" + "@aws-sdk/client-secrets-manager": "^3.1023.0" "@aws-sdk/client-sqs": "^3.1023.0" "@aws-sdk/client-ssm": "^3.1025.0" "@aws-sdk/client-sts": "^3.1023.0" @@ -54,6 +58,7 @@ catalogs: "@tsconfig/node22": "^22.0.5" "@types/aws-lambda": "^8.10.161" "@types/node": "^24.12.0" + "@types/node-forge": "^1.3.11" "@types/yargs": "^17.0.24" esbuild: "^0.28.0" knip: "^6.3.1" diff --git a/scripts/config/pre-commit.yaml b/scripts/config/pre-commit.yaml index a7619797..1e1da873 100644 --- a/scripts/config/pre-commit.yaml +++ b/scripts/config/pre-commit.yaml @@ -8,6 +8,7 @@ repos: - id: check-added-large-files - id: check-symlinks - id: detect-private-key + exclude: 'lambdas/https-client-lambda/src/__tests__/tls-agent-factory\.test\.ts' - id: end-of-file-fixer - id: forbid-new-submodules - id: mixed-line-ending diff --git a/src/config-cache/jest.config.ts b/src/config-cache/jest.config.ts new file mode 100644 index 00000000..6ecf333b --- /dev/null +++ b/src/config-cache/jest.config.ts @@ -0,0 +1,14 @@ +import { nodeJestConfig } from "../../jest.config.base.ts"; + +export default { + ...nodeJestConfig, + coverageThreshold: { + global: { + ...nodeJestConfig.coverageThreshold?.global, + branches: 100, + functions: 100, + lines: 100, + statements: 100, + }, + }, +}; diff --git a/src/config-cache/package.json b/src/config-cache/package.json new file mode 100644 index 00000000..61bf815f --- /dev/null +++ b/src/config-cache/package.json @@ -0,0 +1,32 @@ +{ + "exports": { + ".": { + "types": "./src/index.ts", + "default": "./src/index.ts" + } + }, + "dependencies": { + "@nhs-notify-client-callbacks/models": "workspace:*" + }, + "devDependencies": { + "@tsconfig/node22": "catalog:tools", + "@types/jest": "catalog:test", + "@types/node": "catalog:tools", + "eslint": "catalog:lint", + "jest": "catalog:test", + "ts-jest": "catalog:test", + "typescript": "catalog:tools" + }, + "engines": { + "node": ">=24.14.1" + }, + "name": "@nhs-notify-client-callbacks/config-cache", + "private": true, + "scripts": { + "lint": "eslint .", + "lint:fix": "eslint . --fix", + "test:unit": "jest", + "typecheck": "tsc --noEmit" + }, + "version": "0.0.1" +} diff --git a/src/config-cache/src/__tests__/config-cache.test.ts b/src/config-cache/src/__tests__/config-cache.test.ts new file mode 100644 index 00000000..179a178a --- /dev/null +++ b/src/config-cache/src/__tests__/config-cache.test.ts @@ -0,0 +1,75 @@ +import type { ClientSubscriptionConfiguration } from "@nhs-notify-client-callbacks/models"; +import { ConfigCache } from "config-cache"; + +const createConfig = (clientId: string): ClientSubscriptionConfiguration => ({ + clientId, + subscriptions: [], + targets: [], +}); + +describe("ConfigCache", () => { + it("stores and retrieves configuration", () => { + const cache = new ConfigCache(60_000); + const config = createConfig("client-1"); + + cache.set("client-1", config); + + expect(cache.get("client-1")).toEqual(config); + }); + + it("returns undefined for non-existent key", () => { + const cache = new ConfigCache(60_000); + + expect(cache.get("non-existent")).toBeUndefined(); + }); + + it("returns cached value without re-fetch when within TTL", () => { + jest.useFakeTimers(); + jest.setSystemTime(new Date("2026-01-01T10:00:00Z")); + + const cache = new ConfigCache(5000); + const config = createConfig("client-1"); + + cache.set("client-1", config); + + jest.advanceTimersByTime(4999); + + expect(cache.get("client-1")).toEqual(config); + + jest.useRealTimers(); + }); + + it("returns undefined for expired entries after TTL", () => { + jest.useFakeTimers(); + jest.setSystemTime(new Date("2026-01-01T10:00:00Z")); + + const cache = new ConfigCache(1000); + const config = createConfig("client-1"); + + cache.set("client-1", config); + expect(cache.get("client-1")).toEqual(config); + + jest.advanceTimersByTime(1001); + + expect(cache.get("client-1")).toBeUndefined(); + + jest.useRealTimers(); + }); + + it("clears all entries", () => { + const cache = new ConfigCache(60_000); + const configA = createConfig("client-a"); + const configB = createConfig("client-b"); + + cache.set("client-a", configA); + cache.set("client-b", configB); + + expect(cache.get("client-a")).toEqual(configA); + expect(cache.get("client-b")).toEqual(configB); + + cache.clear(); + + expect(cache.get("client-a")).toBeUndefined(); + expect(cache.get("client-b")).toBeUndefined(); + }); +}); diff --git a/lambdas/client-transform-filter-lambda/src/services/config-cache.ts b/src/config-cache/src/config-cache.ts similarity index 100% rename from lambdas/client-transform-filter-lambda/src/services/config-cache.ts rename to src/config-cache/src/config-cache.ts diff --git a/src/config-cache/src/index.ts b/src/config-cache/src/index.ts new file mode 100644 index 00000000..1da1a0f1 --- /dev/null +++ b/src/config-cache/src/index.ts @@ -0,0 +1 @@ +export { ConfigCache } from "./config-cache"; diff --git a/src/config-cache/tsconfig.json b/src/config-cache/tsconfig.json new file mode 100644 index 00000000..a50e6fc0 --- /dev/null +++ b/src/config-cache/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "isolatedModules": true, + "paths": { + "*": [ + "./src/*" + ] + } + }, + "extends": "../../tsconfig.base.json", + "include": [ + "src/**/*" + ] +} diff --git a/src/models/src/__tests__/client-config-schema.test.ts b/src/models/src/__tests__/client-config-schema.test.ts index da1e5429..d166c037 100644 --- a/src/models/src/__tests__/client-config-schema.test.ts +++ b/src/models/src/__tests__/client-config-schema.test.ts @@ -19,6 +19,8 @@ const expectFailedParse = ( return result; }; +const VALID_SPKI_HASH = "KL/yFsVH+gnkkzdQ+DSlV8xMQOMehksgT6aOqQviOu8="; + const createValidConfig = (): ClientSubscriptionConfiguration => ({ clientId: "client-1", subscriptions: [ @@ -45,6 +47,8 @@ const createValidConfig = (): ClientSubscriptionConfiguration => ({ invocationMethod: "POST", invocationRateLimit: 10, apiKey: { headerName: "x-api-key", headerValue: "secret" }, + mtls: { enabled: true }, + certPinning: { enabled: true, spkiHash: VALID_SPKI_HASH }, }, ], }); @@ -147,4 +151,105 @@ describe("parseClientSubscriptionConfiguration", () => { }), ]); }); + + it("parses a valid config with mtls, certPinning, and delivery fields", () => { + const config = createValidConfig(); + config.targets[0].delivery = { + maxRetryDurationSeconds: 7200, + circuitBreaker: { enabled: true }, + }; + + expect(parseClientSubscriptionConfiguration(config)).toEqual({ + success: true, + data: config, + }); + }); + + it("returns a failed parse result when mtls field is missing", () => { + const config = createValidConfig(); + const target = config.targets[0] as Record; + delete target.mtls; + + const result = expectFailedParse( + parseClientSubscriptionConfiguration(config), + ); + + expect(result.error.issues).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + path: expect.arrayContaining(["targets", 0, "mtls"]), + }), + ]), + ); + }); + + it("returns a failed parse result when spkiHash has an invalid pattern", () => { + const config = createValidConfig(); + config.targets[0].certPinning.spkiHash = "not-a-valid-hash"; + + const result = expectFailedParse( + parseClientSubscriptionConfiguration(config), + ); + + expect(result.error.issues).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + message: "Invalid SPKI hash", + }), + ]), + ); + }); + + it("returns a failed parse result when certPinning.enabled is true without spkiHash", () => { + const config = createValidConfig(); + config.targets[0].certPinning = { enabled: true }; + + const result = expectFailedParse( + parseClientSubscriptionConfiguration(config), + ); + + expect(result.error.issues).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + message: "spkiHash is required when certPinning is enabled", + }), + ]), + ); + }); + + it("returns a failed parse result when maxRetryDurationSeconds is below 60", () => { + const config = createValidConfig(); + config.targets[0].delivery = { maxRetryDurationSeconds: 59 }; + + const result = expectFailedParse( + parseClientSubscriptionConfiguration(config), + ); + + expect(result.success).toBe(false); + }); + + it("returns a failed parse result when maxRetryDurationSeconds is above 43200", () => { + const config = createValidConfig(); + config.targets[0].delivery = { maxRetryDurationSeconds: 43_201 }; + + const result = expectFailedParse( + parseClientSubscriptionConfiguration(config), + ); + + expect(result.success).toBe(false); + }); + + it("accepts maxRetryDurationSeconds at boundary value 60", () => { + const config = createValidConfig(); + config.targets[0].delivery = { maxRetryDurationSeconds: 60 }; + + expect(parseClientSubscriptionConfiguration(config).success).toBe(true); + }); + + it("accepts maxRetryDurationSeconds at boundary value 43200", () => { + const config = createValidConfig(); + config.targets[0].delivery = { maxRetryDurationSeconds: 43_200 }; + + expect(parseClientSubscriptionConfiguration(config).success).toBe(true); + }); }); diff --git a/src/models/src/client-config-schema.ts b/src/models/src/client-config-schema.ts index b56a9439..5da8479d 100644 --- a/src/models/src/client-config-schema.ts +++ b/src/models/src/client-config-schema.ts @@ -22,6 +22,20 @@ const httpsUrlSchema = z.string().refine( }, ); +const SPKI_HASH_PATTERN = /^[A-Za-z0-9+/]{43}=$/; + +const certPinningSchema = z + .object({ + enabled: z.boolean(), + spkiHash: z + .string() + .regex(SPKI_HASH_PATTERN, "Invalid SPKI hash") + .optional(), + }) + .refine((val) => !val.enabled || val.spkiHash !== undefined, { + message: "spkiHash is required when certPinning is enabled", + }); + const targetSchema = z.object({ targetId: z.string(), type: z.literal("API"), @@ -32,6 +46,20 @@ const targetSchema = z.object({ headerName: z.string(), headerValue: z.string(), }), + mtls: z.object({ + enabled: z.boolean(), + }), + certPinning: certPinningSchema, + delivery: z + .object({ + maxRetryDurationSeconds: z.number().min(60).max(43_200).optional(), + circuitBreaker: z + .object({ + enabled: z.boolean(), + }) + .optional(), + }) + .optional(), }); const baseSubscriptionSchema = z.object({ diff --git a/src/models/src/client-config.ts b/src/models/src/client-config.ts index 84116353..9b434596 100644 --- a/src/models/src/client-config.ts +++ b/src/models/src/client-config.ts @@ -15,6 +15,19 @@ export type CallbackTarget = { headerName: string; headerValue: string; }; + mtls: { + enabled: boolean; + }; + certPinning: { + enabled: boolean; + spkiHash?: string; + }; + delivery?: { + maxRetryDurationSeconds?: number; + circuitBreaker?: { + enabled: boolean; + }; + }; }; type SubscriptionConfigurationBase = { diff --git a/tests/integration/fixtures/subscriptions/mock-client-1.json b/tests/integration/fixtures/subscriptions/mock-client-1.json index 1e76ad65..ed70c73f 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-1.json +++ b/tests/integration/fixtures/subscriptions/mock-client-1.json @@ -35,9 +35,15 @@ "headerName": "x-api-key", "headerValue": "REPLACED_BY_TERRAFORM" }, + "certPinning": { + "enabled": false + }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", "invocationMethod": "POST", "invocationRateLimit": 10, + "mtls": { + "enabled": false + }, "targetId": "target-23b2ee2f-8e81-43cd-9bb8-5ea30a09f779", "type": "API" } diff --git a/tests/integration/fixtures/subscriptions/mock-client-2.json b/tests/integration/fixtures/subscriptions/mock-client-2.json index ee7091cd..ab5460c9 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-2.json +++ b/tests/integration/fixtures/subscriptions/mock-client-2.json @@ -20,9 +20,15 @@ "headerName": "x-api-key", "headerValue": "REPLACED_BY_TERRAFORM" }, + "certPinning": { + "enabled": false + }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", "invocationMethod": "POST", "invocationRateLimit": 10, + "mtls": { + "enabled": false + }, "targetId": "target-1f3aa57d-c0b6-4a0a-a8e9-c7f97f1e27e7", "type": "API" }, @@ -31,9 +37,15 @@ "headerName": "x-api-key", "headerValue": "REPLACED_BY_TERRAFORM" }, + "certPinning": { + "enabled": false + }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", "invocationMethod": "POST", "invocationRateLimit": 10, + "mtls": { + "enabled": false + }, "targetId": "target-c23f4ad8-2b6f-4510-b5b6-40f2b7fbbec5", "type": "API" } diff --git a/tests/integration/fixtures/subscriptions/mock-client-circuit-breaker.json b/tests/integration/fixtures/subscriptions/mock-client-circuit-breaker.json new file mode 100644 index 00000000..783681c5 --- /dev/null +++ b/tests/integration/fixtures/subscriptions/mock-client-circuit-breaker.json @@ -0,0 +1,40 @@ +{ + "clientId": "mock-client-circuit-breaker", + "subscriptions": [ + { + "messageStatuses": [ + "DELIVERED", + "FAILED" + ], + "subscriptionId": "sub-cb-msg-001", + "subscriptionType": "MessageStatus", + "targetIds": [ + "target-cb-001" + ] + } + ], + "targets": [ + { + "apiKey": { + "headerName": "x-api-key", + "headerValue": "REPLACED_BY_TERRAFORM" + }, + "certPinning": { + "enabled": false + }, + "delivery": { + "circuitBreaker": { + "enabled": true + } + }, + "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", + "invocationMethod": "POST", + "invocationRateLimit": 10, + "mtls": { + "enabled": false + }, + "targetId": "target-cb-001", + "type": "API" + } + ] +} diff --git a/tests/integration/fixtures/subscriptions/mock-client-mtls.json b/tests/integration/fixtures/subscriptions/mock-client-mtls.json new file mode 100644 index 00000000..d225612f --- /dev/null +++ b/tests/integration/fixtures/subscriptions/mock-client-mtls.json @@ -0,0 +1,36 @@ +{ + "clientId": "mock-client-mtls", + "subscriptions": [ + { + "messageStatuses": [ + "DELIVERED", + "FAILED" + ], + "subscriptionId": "sub-mtls-msg-001", + "subscriptionType": "MessageStatus", + "targetIds": [ + "target-mtls-001" + ] + } + ], + "targets": [ + { + "apiKey": { + "headerName": "x-api-key", + "headerValue": "REPLACED_BY_TERRAFORM" + }, + "certPinning": { + "enabled": true, + "spkiHash": "REPLACED_BY_TERRAFORM" + }, + "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", + "invocationMethod": "POST", + "invocationRateLimit": 10, + "mtls": { + "enabled": true + }, + "targetId": "target-mtls-001", + "type": "API" + } + ] +} diff --git a/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json b/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json new file mode 100644 index 00000000..80a40e6a --- /dev/null +++ b/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json @@ -0,0 +1,35 @@ +{ + "clientId": "mock-client-rate-limit", + "subscriptions": [ + { + "messageStatuses": [ + "DELIVERED", + "FAILED" + ], + "subscriptionId": "sub-rl-msg-001", + "subscriptionType": "MessageStatus", + "targetIds": [ + "target-rl-001" + ] + } + ], + "targets": [ + { + "apiKey": { + "headerName": "x-api-key", + "headerValue": "REPLACED_BY_TERRAFORM" + }, + "certPinning": { + "enabled": false + }, + "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", + "invocationMethod": "POST", + "invocationRateLimit": 2, + "mtls": { + "enabled": false + }, + "targetId": "target-rl-001", + "type": "API" + } + ] +} diff --git a/tests/integration/helpers/event-factories.ts b/tests/integration/helpers/event-factories.ts index 015bbced..35f7f2e8 100644 --- a/tests/integration/helpers/event-factories.ts +++ b/tests/integration/helpers/event-factories.ts @@ -1,5 +1,6 @@ import type { ChannelStatusData, + ClientCallbackPayload, MessageStatusData, StatusPublishEvent, } from "@nhs-notify-client-callbacks/models"; @@ -17,6 +18,37 @@ type ChannelEventOverrides = { data?: Partial; }; +type DeliveryMessage = { + payload: ClientCallbackPayload; + subscriptions: string[]; + targetId: string; +}; + +export function createDeliveryMessage( + overrides?: Partial, +): DeliveryMessage { + const config = getMockItClientConfig(); + const targetId = + overrides?.targetId ?? config.targets[0]?.targetId ?? "target-001"; + + return { + payload: + overrides?.payload ?? + ({ + data: [ + { + type: "MessageStatus", + attributes: { messageStatus: "delivered" }, + links: { message: "https://api.example.invalid/messages/msg-001" }, + meta: { idempotencyKey: crypto.randomUUID() }, + }, + ], + } as ClientCallbackPayload), + subscriptions: overrides?.subscriptions ?? ["sub-001"], + targetId, + }; +} + export function createMessageStatusPublishEvent( overrides?: MessageEventOverrides, ): StatusPublishEvent { diff --git a/tests/integration/helpers/mock-client-config.ts b/tests/integration/helpers/mock-client-config.ts index eb94974c..a004b4bc 100644 --- a/tests/integration/helpers/mock-client-config.ts +++ b/tests/integration/helpers/mock-client-config.ts @@ -20,6 +20,11 @@ export const CLIENT_FIXTURES = { apiKeyVar: "MOCK_CLIENT_2_API_KEY", applicationIdVar: "MOCK_CLIENT_2_APPLICATION_ID", }, + clientMtls: { + fixture: "mock-client-mtls.json", + apiKeyVar: "MOCK_CLIENT_MTLS_API_KEY", + applicationIdVar: "MOCK_CLIENT_MTLS_APPLICATION_ID", + }, } as const; export type ClientFixtureKey = keyof typeof CLIENT_FIXTURES; diff --git a/tools/client-subscriptions-management/package.json b/tools/client-subscriptions-management/package.json index ec4cb3b3..4d934470 100644 --- a/tools/client-subscriptions-management/package.json +++ b/tools/client-subscriptions-management/package.json @@ -29,6 +29,7 @@ "@aws-sdk/client-sts": "catalog:aws", "@aws-sdk/credential-providers": "catalog:aws", "@nhs-notify-client-callbacks/models": "workspace:*", + "picocolors": "catalog:app", "table": "catalog:app", "yargs": "catalog:app", "zod": "catalog:app" diff --git a/tools/client-subscriptions-management/src/__tests__/domain/client-subscription-builder.test.ts b/tools/client-subscriptions-management/src/__tests__/domain/client-subscription-builder.test.ts index 10fcb111..3b535fbd 100644 --- a/tools/client-subscriptions-management/src/__tests__/domain/client-subscription-builder.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/domain/client-subscription-builder.test.ts @@ -8,6 +8,16 @@ const UUID_REGEX = /^[\da-f]{8}-[\da-f]{4}-4[\da-f]{3}-[89ab][\da-f]{3}-[\da-f]{12}$/i; describe("buildTarget", () => { + let warnSpy: jest.SpyInstance; + + beforeEach(() => { + warnSpy = jest.spyOn(console, "warn").mockImplementation(); + }); + + afterEach(() => { + warnSpy.mockRestore(); + }); + it("builds a target with required fields", () => { const result = buildTarget({ apiEndpoint: "https://example.com/webhook", @@ -22,6 +32,8 @@ describe("buildTarget", () => { invocationMethod: "POST", invocationRateLimit: 10, apiKey: { headerName: "x-api-key", headerValue: "secret" }, + mtls: { enabled: false }, + certPinning: { enabled: false }, }); expect(result.targetId).toMatch(UUID_REGEX); }); @@ -35,6 +47,73 @@ describe("buildTarget", () => { expect(result.apiKey.headerName).toBe("x-api-key"); }); + + it("emits warning when mtls is disabled", () => { + buildTarget({ + apiEndpoint: "https://example.com/webhook", + apiKey: "secret", + rateLimit: 10, + mtls: { enabled: false }, + }); + + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining("mTLS is disabled"), + ); + }); + + it("emits warning when mtls enabled but certPinning disabled", () => { + buildTarget({ + apiEndpoint: "https://example.com/webhook", + apiKey: "secret", + rateLimit: 10, + mtls: { enabled: true }, + certPinning: { enabled: false }, + }); + + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining("certificate pinning is disabled"), + ); + }); + + it("emits warning when certPinning enabled without spkiHash", () => { + buildTarget({ + apiEndpoint: "https://example.com/webhook", + apiKey: "secret", + rateLimit: 10, + mtls: { enabled: true }, + certPinning: { enabled: true }, + }); + + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining("no SPKI hash is stored"), + ); + }); + + it("emits warning when certPinning enabled but mtls disabled", () => { + buildTarget({ + apiEndpoint: "https://example.com/webhook", + apiKey: "secret", + rateLimit: 10, + mtls: { enabled: false }, + certPinning: { enabled: true }, + }); + + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining("mTLS is disabled"), + ); + }); + + it("emits no warnings for fully secure config", () => { + buildTarget({ + apiEndpoint: "https://example.com/webhook", + apiKey: "secret", + rateLimit: 10, + mtls: { enabled: true }, + certPinning: { enabled: true, spkiHash: "abc123" }, + }); + + expect(warnSpy).not.toHaveBeenCalled(); + }); }); describe("buildMessageStatusSubscription", () => { diff --git a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-certificate.test.ts b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-certificate.test.ts new file mode 100644 index 00000000..9deb7065 --- /dev/null +++ b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-certificate.test.ts @@ -0,0 +1,127 @@ +import path from "node:path"; +import { mkdtempSync, unlinkSync, writeFileSync } from "node:fs"; +import os from "node:os"; +import * as cli from "src/entrypoint/cli/targets-set-certificate"; +import { + captureCliConsoleState, + expectWrappedCliError, + getMockCreateRepository, + resetCliConsoleState, + resetMockCreateRepository, + restoreCliConsoleState, +} from "src/__tests__/entrypoint/cli/test-utils"; +import { + createClientSubscriptionConfig, + createTarget, +} from "src/__tests__/helpers/client-subscription-fixtures"; + +const mockGetClientConfig = jest.fn(); +const mockPutClientConfig = jest.fn(); +const mockFormatClientConfig = jest.fn(); + +jest.mock("src/entrypoint/cli/helper", () => ({ + ...jest.requireActual("src/entrypoint/cli/helper"), + createRepository: jest.fn(), +})); +jest.mock("src/format", () => ({ + formatClientConfig: (...args: unknown[]) => mockFormatClientConfig(...args), +})); + +const FIXTURE_CERT_PATH = path.join(__dirname, "../../fixtures/test-cert.pem"); +const EXPECTED_SPKI_HASH = "SpGTft7LNMxLIx5s9GMAaHTo1uz4eqMtrAFws3Exs8I="; + +const target = createTarget(); +const config = createClientSubscriptionConfig({ targets: [target] }); +const mockCreateRepository = getMockCreateRepository(); + +describe("targets-set-certificate CLI", () => { + const originalCliConsoleState = captureCliConsoleState(); + + const baseArgs = [ + "node", + "script", + "--client-id", + "client-1", + "--bucket-name", + "bucket-1", + "--target-id", + target.targetId, + ]; + + beforeEach(() => { + mockGetClientConfig.mockReset(); + mockGetClientConfig.mockResolvedValue(config); + mockPutClientConfig.mockReset(); + mockPutClientConfig.mockResolvedValue(config); + mockFormatClientConfig.mockReset(); + mockFormatClientConfig.mockReturnValue("formatted-output"); + resetMockCreateRepository({ + getClientConfig: mockGetClientConfig, + putClientConfig: mockPutClientConfig, + }); + resetCliConsoleState(); + }); + + afterAll(() => { + restoreCliConsoleState(originalCliConsoleState); + }); + + it("extracts SPKI hash from valid PEM and stores it", async () => { + await cli.main([...baseArgs, "--pem-file", FIXTURE_CERT_PATH]); + + expect(mockPutClientConfig).toHaveBeenCalledWith( + "client-1", + expect.objectContaining({ + targets: [ + expect.objectContaining({ + certPinning: expect.objectContaining({ + spkiHash: EXPECTED_SPKI_HASH, + }), + }), + ], + }), + false, + ); + }); + + it("errors for invalid PEM file", async () => { + const tmpDir = mkdtempSync(path.join(os.tmpdir(), "cert-test-")); + const invalidPath = path.join(tmpDir, "invalid.pem"); + // eslint-disable-next-line security/detect-non-literal-fs-filename + writeFileSync(invalidPath, "not-a-pem"); + + await cli.main([...baseArgs, "--pem-file", invalidPath]).catch(() => {}); + + expect(mockPutClientConfig).not.toHaveBeenCalled(); + + // eslint-disable-next-line security/detect-non-literal-fs-filename + unlinkSync(invalidPath); + }); + + it("passes dry-run to putClientConfig", async () => { + await cli.main([ + ...baseArgs, + "--pem-file", + FIXTURE_CERT_PATH, + "--dry-run", + "true", + ]); + + expect(mockPutClientConfig).toHaveBeenCalledWith( + "client-1", + expect.any(Object), + true, + ); + }); + + it("handles repository errors in wrapped CLI", async () => { + expect.hasAssertions(); + mockCreateRepository.mockRejectedValue(new Error("Boom")); + + await expectWrappedCliError(cli.main, [ + ...baseArgs, + "--pem-file", + FIXTURE_CERT_PATH, + ]); + }); +}); diff --git a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-mtls.test.ts b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-mtls.test.ts new file mode 100644 index 00000000..0703f9d3 --- /dev/null +++ b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-mtls.test.ts @@ -0,0 +1,107 @@ +import * as cli from "src/entrypoint/cli/targets-set-mtls"; +import { + captureCliConsoleState, + expectWrappedCliError, + getMockCreateRepository, + resetCliConsoleState, + resetMockCreateRepository, + restoreCliConsoleState, +} from "src/__tests__/entrypoint/cli/test-utils"; +import { + createClientSubscriptionConfig, + createTarget, +} from "src/__tests__/helpers/client-subscription-fixtures"; + +const mockGetClientConfig = jest.fn(); +const mockPutClientConfig = jest.fn(); +const mockFormatClientConfig = jest.fn(); + +jest.mock("src/entrypoint/cli/helper", () => ({ + ...jest.requireActual("src/entrypoint/cli/helper"), + createRepository: jest.fn(), +})); +jest.mock("src/format", () => ({ + formatClientConfig: (...args: unknown[]) => mockFormatClientConfig(...args), +})); + +const target = createTarget(); +const config = createClientSubscriptionConfig({ targets: [target] }); +const mockCreateRepository = getMockCreateRepository(); + +describe("targets-set-mtls CLI", () => { + const originalCliConsoleState = captureCliConsoleState(); + + const baseArgs = [ + "node", + "script", + "--client-id", + "client-1", + "--bucket-name", + "bucket-1", + "--target-id", + target.targetId, + ]; + + beforeEach(() => { + mockGetClientConfig.mockReset(); + mockGetClientConfig.mockResolvedValue(config); + mockPutClientConfig.mockReset(); + mockPutClientConfig.mockResolvedValue(config); + mockFormatClientConfig.mockReset(); + mockFormatClientConfig.mockReturnValue("formatted-output"); + resetMockCreateRepository({ + getClientConfig: mockGetClientConfig, + putClientConfig: mockPutClientConfig, + }); + resetCliConsoleState(); + console.warn = jest.fn(); + }); + + afterAll(() => { + restoreCliConsoleState(originalCliConsoleState); + }); + + it("enables mTLS with --enable flag", async () => { + await cli.main([...baseArgs, "--enable"]); + + expect(mockPutClientConfig).toHaveBeenCalledWith( + "client-1", + expect.objectContaining({ + targets: [expect.objectContaining({ mtls: { enabled: true } })], + }), + false, + ); + }); + + it("disables mTLS with --disable flag and emits ANSI warning", async () => { + await cli.main([...baseArgs, "--disable"]); + + expect(console.warn).toHaveBeenCalledWith( + expect.stringContaining("Disabling mTLS"), + ); + expect(mockPutClientConfig).toHaveBeenCalledWith( + "client-1", + expect.objectContaining({ + targets: [expect.objectContaining({ mtls: { enabled: false } })], + }), + false, + ); + }); + + it("passes dry-run to putClientConfig", async () => { + await cli.main([...baseArgs, "--enable", "--dry-run", "true"]); + + expect(mockPutClientConfig).toHaveBeenCalledWith( + "client-1", + expect.any(Object), + true, + ); + }); + + it("handles errors in wrapped CLI", async () => { + expect.hasAssertions(); + mockCreateRepository.mockRejectedValue(new Error("Boom")); + + await expectWrappedCliError(cli.main, [...baseArgs, "--enable"]); + }); +}); diff --git a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-pinning.test.ts b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-pinning.test.ts new file mode 100644 index 00000000..f2cfd9ea --- /dev/null +++ b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-pinning.test.ts @@ -0,0 +1,152 @@ +import * as cli from "src/entrypoint/cli/targets-set-pinning"; +import { + captureCliConsoleState, + expectWrappedCliError, + getMockCreateRepository, + resetCliConsoleState, + resetMockCreateRepository, + restoreCliConsoleState, +} from "src/__tests__/entrypoint/cli/test-utils"; +import { + createClientSubscriptionConfig, + createTarget, +} from "src/__tests__/helpers/client-subscription-fixtures"; + +const mockGetClientConfig = jest.fn(); +const mockPutClientConfig = jest.fn(); +const mockFormatClientConfig = jest.fn(); + +jest.mock("src/entrypoint/cli/helper", () => ({ + ...jest.requireActual("src/entrypoint/cli/helper"), + createRepository: jest.fn(), +})); +jest.mock("src/format", () => ({ + formatClientConfig: (...args: unknown[]) => mockFormatClientConfig(...args), +})); + +const target = createTarget({ + certPinning: { enabled: true, spkiHash: "existing-hash" }, +}); +const config = createClientSubscriptionConfig({ targets: [target] }); +const mockCreateRepository = getMockCreateRepository(); + +describe("targets-set-pinning CLI", () => { + const originalCliConsoleState = captureCliConsoleState(); + + const baseArgs = [ + "node", + "script", + "--client-id", + "client-1", + "--bucket-name", + "bucket-1", + "--target-id", + target.targetId, + ]; + + beforeEach(() => { + mockGetClientConfig.mockReset(); + mockGetClientConfig.mockResolvedValue( + createClientSubscriptionConfig({ + targets: [ + createTarget({ + certPinning: { enabled: true, spkiHash: "existing-hash" }, + }), + ], + }), + ); + mockPutClientConfig.mockReset(); + mockPutClientConfig.mockResolvedValue(config); + mockFormatClientConfig.mockReset(); + mockFormatClientConfig.mockReturnValue("formatted-output"); + resetMockCreateRepository({ + getClientConfig: mockGetClientConfig, + putClientConfig: mockPutClientConfig, + }); + resetCliConsoleState(); + console.warn = jest.fn(); + }); + + afterAll(() => { + restoreCliConsoleState(originalCliConsoleState); + }); + + it("enables certificate pinning with --enable flag", async () => { + await cli.main([...baseArgs, "--enable"]); + + expect(mockPutClientConfig).toHaveBeenCalledWith( + "client-1", + expect.objectContaining({ + targets: [ + expect.objectContaining({ + certPinning: { enabled: true, spkiHash: "existing-hash" }, + }), + ], + }), + false, + ); + }); + + it("disables pinning with --disable flag and emits ANSI warning", async () => { + await cli.main([...baseArgs, "--disable"]); + + expect(console.warn).toHaveBeenCalledWith( + expect.stringContaining("Disabling certificate pinning"), + ); + expect(mockPutClientConfig).toHaveBeenCalledWith( + "client-1", + expect.objectContaining({ + targets: [ + expect.objectContaining({ + certPinning: { enabled: false, spkiHash: "existing-hash" }, + }), + ], + }), + false, + ); + }); + + it("preserves existing spkiHash when disabling", async () => { + await cli.main([...baseArgs, "--disable"]); + + const putCall = mockPutClientConfig.mock.calls[0]; + const updatedTarget = putCall[1].targets[0]; + expect(updatedTarget.certPinning.spkiHash).toBe("existing-hash"); + }); + + it("passes dry-run to putClientConfig", async () => { + await cli.main([...baseArgs, "--enable", "--dry-run", "true"]); + + expect(mockPutClientConfig).toHaveBeenCalledWith( + "client-1", + expect.any(Object), + true, + ); + }); + + it("handles errors in wrapped CLI", async () => { + expect.hasAssertions(); + mockCreateRepository.mockRejectedValue(new Error("Boom")); + + await expectWrappedCliError(cli.main, [...baseArgs, "--enable"]); + }); + + it("throws when enabling pinning but target has no spkiHash", async () => { + expect.hasAssertions(); + mockGetClientConfig.mockResolvedValue( + createClientSubscriptionConfig({ + targets: [ + createTarget({ + certPinning: { enabled: false }, + }), + ], + }), + ); + + await expectWrappedCliError( + cli.main, + [...baseArgs, "--enable"], + `Target '${target.targetId}' has no SPKI hash stored. Run 'targets-set-certificate' first to configure a certificate hash before enabling pinning.`, + ); + }); +}); diff --git a/tools/client-subscriptions-management/src/__tests__/fixtures/test-cert.pem b/tools/client-subscriptions-management/src/__tests__/fixtures/test-cert.pem new file mode 100644 index 00000000..66accebb --- /dev/null +++ b/tools/client-subscriptions-management/src/__tests__/fixtures/test-cert.pem @@ -0,0 +1,14 @@ +-----BEGIN CERTIFICATE----- +MIICETCCAbgCCQD0bFWfktPerzAKBggqhkjOPQQDAjAXMRUwEwYDVQQDDAx0ZXN0 +LWZpeHR1cmUwHhcNMjYwNDE3MDgzMjAzWhcNMzYwNDE0MDgzMjAzWjAXMRUwEwYD +VQQDDAx0ZXN0LWZpeHR1cmUwggFLMIIBAwYHKoZIzj0CATCB9wIBATAsBgcqhkjO +PQEBAiEA/////wAAAAEAAAAAAAAAAAAAAAD///////////////8wWwQg/////wAA +AAEAAAAAAAAAAAAAAAD///////////////wEIFrGNdiqOpPns+u9VXaYhrxlHQaw +zFOw9jvOPD4n0mBLAxUAxJ02CIbnBJNqZnjhE50mt4GffpAEQQRrF9Hy4SxCR/i8 +5uVjpEDydwN9gS3rM6D0oTlF2JjClk/jQuL+Gn+bjufrSnwPnhYrzjNXazFezsu2 +QGg3v1H1AiEA/////wAAAAD//////////7zm+q2nF56E87nKwvxjJVECAQEDQgAE +WpOtSSCENuhBWnPFy4csFZkUT/t77xiQxJr/mrClSsNs4H7vwgXRc5OCT+BuTruT +J/kv6ipp/9s/c5/WP8Ln9zAKBggqhkjOPQQDAgNHADBEAiA46dblj9UZZe163Me1 +sydmzRkzLrtAy1pLCcPp86Z4WwIgRD6/Oa4UQ/C6tCDpdLquzAtRxSNKuHMPLXK9 +vkFt930= +-----END CERTIFICATE----- diff --git a/tools/client-subscriptions-management/src/__tests__/helpers/client-subscription-fixtures.ts b/tools/client-subscriptions-management/src/__tests__/helpers/client-subscription-fixtures.ts index de12586e..ee41bd09 100644 --- a/tools/client-subscriptions-management/src/__tests__/helpers/client-subscription-fixtures.ts +++ b/tools/client-subscriptions-management/src/__tests__/helpers/client-subscription-fixtures.ts @@ -24,6 +24,8 @@ export const createTarget = ( headerValue: "secret", ...overrides.apiKey, }, + mtls: { enabled: false }, + certPinning: { enabled: false }, ...overrides, }); diff --git a/tools/client-subscriptions-management/src/domain/client-subscription-builder.ts b/tools/client-subscriptions-management/src/domain/client-subscription-builder.ts index f91ee5a4..43edbc5a 100644 --- a/tools/client-subscriptions-management/src/domain/client-subscription-builder.ts +++ b/tools/client-subscriptions-management/src/domain/client-subscription-builder.ts @@ -7,12 +7,15 @@ import type { MessageStatusSubscriptionConfiguration, SupplierStatus, } from "@nhs-notify-client-callbacks/models"; +import pc from "picocolors"; export type BuildTargetArgs = { apiEndpoint: string; apiKey: string; apiKeyHeaderName?: string; rateLimit: number; + mtls?: { enabled: boolean }; + certPinning?: { enabled: boolean; spkiHash?: string }; }; export type BuildMessageStatusSubscriptionArgs = { @@ -30,6 +33,31 @@ export type BuildChannelStatusSubscriptionArgs = { }; export function buildTarget(args: BuildTargetArgs): CallbackTarget { + const mtls = args.mtls ?? { enabled: false }; + const certPinning = args.certPinning ?? { enabled: false }; + + const warnings: string[] = []; + + if (!mtls.enabled) { + warnings.push("mTLS is disabled — callbacks will not use mutual TLS"); + } + + if (mtls.enabled && !certPinning.enabled) { + warnings.push("mTLS is enabled but certificate pinning is disabled"); + } + + if (certPinning.enabled && !certPinning.spkiHash) { + warnings.push("Certificate pinning is enabled but no SPKI hash is stored"); + } + + if (!mtls.enabled && certPinning.enabled) { + warnings.push("Certificate pinning is enabled but mTLS is disabled"); + } + + for (const warning of warnings) { + console.warn(pc.bold(pc.red(`WARNING: ${warning}`))); + } + return { targetId: crypto.randomUUID(), type: "API", @@ -40,6 +68,8 @@ export function buildTarget(args: BuildTargetArgs): CallbackTarget { headerName: args.apiKeyHeaderName ?? "x-api-key", headerValue: args.apiKey, }, + mtls, + certPinning, }; } diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/clients-put.ts b/tools/client-subscriptions-management/src/entrypoint/cli/clients-put.ts index c0d13554..2e5dc849 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/clients-put.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/clients-put.ts @@ -56,7 +56,6 @@ export const handler: CliCommand["handler"] = async (argv) => { return; } - // Safe as this is an internal tool and this CLI option we are expecting the user will run locally and manually // eslint-disable-next-line security/detect-non-literal-fs-filename const rawJson = argv.json ?? readFileSync(argv.file!, "utf8"); diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-certificate.ts b/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-certificate.ts new file mode 100644 index 00000000..ea32f8b9 --- /dev/null +++ b/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-certificate.ts @@ -0,0 +1,94 @@ +import { X509Certificate, createHash } from "node:crypto"; +import { readFileSync } from "node:fs"; +import type { Argv } from "yargs"; +import { + type CliCommand, + type ClientCliArgs, + type WriteCliArgs, + clientIdOption, + commonOptions, + createRepository, + runCommand, + writeOptions, +} from "src/entrypoint/cli/helper"; +import { formatClientConfig } from "src/format"; + +type TargetsSetCertificateArgs = ClientCliArgs & + WriteCliArgs & { + "target-id": string; + "pem-file": string; + }; + +export const builder = (yargs: Argv) => + yargs.options({ + ...commonOptions, + ...clientIdOption, + ...writeOptions, + "target-id": { + type: "string", + demandOption: true, + description: "Target identifier to update", + }, + "pem-file": { + type: "string", + demandOption: true, + description: "Path to PEM certificate file", + }, + }); + +function extractSpkiHash(pemPath: string): string { + // eslint-disable-next-line security/detect-non-literal-fs-filename -- path is provided directly by the operator via CLI arg + const pemBuffer = readFileSync(pemPath); + const x509 = new X509Certificate(pemBuffer); + const spkiDer = x509.publicKey.export({ + type: "spki", + format: "der", + }) as Buffer; + return createHash("sha256").update(spkiDer).digest("base64"); +} + +export const handler: CliCommand["handler"] = async ( + argv, +) => { + const spkiHash = extractSpkiHash(argv["pem-file"]); + console.log(`Extracted SPKI hash: ${spkiHash}`); + + const repository = await createRepository(argv); + const config = await repository.getClientConfig(argv["client-id"]); + + if (!config) { + throw new Error(`No configuration found for client: ${argv["client-id"]}`); + } + + const target = config.targets.find((t) => t.targetId === argv["target-id"]); + + if (!target) { + throw new Error( + `Target '${argv["target-id"]}' not found for client '${argv["client-id"]}'`, + ); + } + + target.certPinning = { + ...target.certPinning, + spkiHash, + }; + + const result = await repository.putClientConfig( + argv["client-id"], + config, + argv["dry-run"], + ); + console.log("Certificate SPKI hash stored successfully"); + console.log(formatClientConfig(result)); +}; + +export const command: CliCommand = { + command: "targets-set-certificate", + describe: "Extract and store SPKI hash from a PEM certificate for a target", + builder, + handler, +}; + +export async function main(args: string[] = process.argv) { + await runCommand(command, args); +} diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-mtls.ts b/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-mtls.ts new file mode 100644 index 00000000..b143b3d8 --- /dev/null +++ b/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-mtls.ts @@ -0,0 +1,101 @@ +import type { Argv } from "yargs"; +import pc from "picocolors"; +import { + type CliCommand, + type ClientCliArgs, + type WriteCliArgs, + clientIdOption, + commonOptions, + createRepository, + runCommand, + writeOptions, +} from "src/entrypoint/cli/helper"; +import { formatClientConfig } from "src/format"; + +type TargetsSetMtlsArgs = ClientCliArgs & + WriteCliArgs & { + "target-id": string; + enable?: boolean; + disable?: boolean; + }; + +export const builder = (yargs: Argv) => + yargs + .options({ + ...commonOptions, + ...clientIdOption, + ...writeOptions, + "target-id": { + type: "string", + demandOption: true, + description: "Target identifier to update", + }, + enable: { + type: "boolean", + description: "Enable mTLS for this target", + conflicts: "disable", + }, + disable: { + type: "boolean", + description: "Disable mTLS for this target", + conflicts: "enable", + }, + }) + .check((argv) => { + if (!argv.enable && !argv.disable) { + throw new Error("Specify either --enable or --disable"); + } + return true; + }); + +export const handler: CliCommand["handler"] = async ( + argv, +) => { + const enabled = argv.enable === true; + + if (!enabled) { + console.warn( + pc.bold( + pc.red("WARNING: Disabling mTLS — callbacks will not use mutual TLS"), + ), + ); + } + + const repository = await createRepository(argv); + const config = await repository.getClientConfig(argv["client-id"]); + + if (!config) { + throw new Error(`No configuration found for client: ${argv["client-id"]}`); + } + + const target = config.targets.find((t) => t.targetId === argv["target-id"]); + + if (!target) { + throw new Error( + `Target '${argv["target-id"]}' not found for client '${argv["client-id"]}'`, + ); + } + + target.mtls = { enabled }; + + const result = await repository.putClientConfig( + argv["client-id"], + config, + argv["dry-run"], + ); + console.log( + `mTLS ${enabled ? "enabled" : "disabled"} for target ${argv["target-id"]}`, + ); + console.log(formatClientConfig(result)); +}; + +export const command: CliCommand = { + command: "targets-set-mtls", + describe: "Enable or disable mTLS for a callback target", + builder, + handler, +}; + +export async function main(args: string[] = process.argv) { + await runCommand(command, args); +} diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-pinning.ts b/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-pinning.ts new file mode 100644 index 00000000..1e6a6c85 --- /dev/null +++ b/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-pinning.ts @@ -0,0 +1,106 @@ +import type { Argv } from "yargs"; +import pc from "picocolors"; +import { + type CliCommand, + type ClientCliArgs, + type WriteCliArgs, + clientIdOption, + commonOptions, + createRepository, + runCommand, + writeOptions, +} from "src/entrypoint/cli/helper"; +import { formatClientConfig } from "src/format"; + +type TargetsSetPinningArgs = ClientCliArgs & + WriteCliArgs & { + "target-id": string; + enable?: boolean; + disable?: boolean; + }; + +export const builder = (yargs: Argv) => + yargs + .options({ + ...commonOptions, + ...clientIdOption, + ...writeOptions, + "target-id": { + type: "string", + demandOption: true, + description: "Target identifier to update", + }, + enable: { + type: "boolean", + description: "Enable certificate pinning for this target", + conflicts: "disable", + }, + disable: { + type: "boolean", + description: "Disable certificate pinning for this target", + conflicts: "enable", + }, + }) + .check((argv) => { + if (!argv.enable && !argv.disable) { + throw new Error("Specify either --enable or --disable"); + } + return true; + }); + +export const handler: CliCommand["handler"] = async ( + argv, +) => { + const enabled = argv.enable === true; + + if (!enabled) { + console.warn(pc.bold(pc.red("WARNING: Disabling certificate pinning"))); + } + + const repository = await createRepository(argv); + const config = await repository.getClientConfig(argv["client-id"]); + + if (!config) { + throw new Error(`No configuration found for client: ${argv["client-id"]}`); + } + + const target = config.targets.find((t) => t.targetId === argv["target-id"]); + + if (!target) { + throw new Error( + `Target '${argv["target-id"]}' not found for client '${argv["client-id"]}'`, + ); + } + + if (enabled && !target.certPinning.spkiHash) { + throw new Error( + `Target '${argv["target-id"]}' has no SPKI hash stored. Run 'targets-set-certificate' first to configure a certificate hash before enabling pinning.`, + ); + } + + target.certPinning = { + ...target.certPinning, + enabled, + }; + + const result = await repository.putClientConfig( + argv["client-id"], + config, + argv["dry-run"], + ); + console.log( + `Certificate pinning ${enabled ? "enabled" : "disabled"} for target ${argv["target-id"]}`, + ); + console.log(formatClientConfig(result)); +}; + +export const command: CliCommand = { + command: "targets-set-pinning", + describe: "Enable or disable certificate pinning for a callback target", + builder, + handler, +}; + +export async function main(args: string[] = process.argv) { + await runCommand(command, args); +} From cba6b107bfd6d746818f8d65dc23947809b20db5 Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Fri, 17 Apr 2026 09:57:03 +0100 Subject: [PATCH 02/65] CCM-16073 - Fixed lints --- scripts/config/sonar-scanner.properties | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/scripts/config/sonar-scanner.properties b/scripts/config/sonar-scanner.properties index 52da5eeb..534653ca 100644 --- a/scripts/config/sonar-scanner.properties +++ b/scripts/config/sonar-scanner.properties @@ -7,3 +7,9 @@ sonar.terraform.provider.aws.version=5.54.1 sonar.cpd.exclusions=**.test.*, src/models/** sonar.coverage.exclusions=.github/**, docs/**, infrastructure/terraform/**, scripts/**, tests/test-support/**, tests/**, lambdas/**/src/__tests__/**, src/**/src/__tests__/**, src/models/**, tools/**/src/__tests__/**, **/jest.config.*, **/knip.ts, eslint.config.mjs sonar.javascript.lcov.reportPaths=lcov.info + +# typescript:S4325 flags non-null/type assertions as "unnecessary" based on a different +# interpretation than our own strict typechecking enforces, causing conflicting lint loops. +sonar.issue.ignore.multicriteria=e1 +sonar.issue.ignore.multicriteria.e1.ruleKey=typescript:S4325 +sonar.issue.ignore.multicriteria.e1.resourceKey=** From 11319b11ae8509d9d2faf375a496bf159d66813b Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Fri, 17 Apr 2026 10:22:26 +0100 Subject: [PATCH 03/65] CCM-16073 - Fixed terraform --- infrastructure/terraform/components/callbacks/locals.tf | 2 ++ .../callbacks/module_transform_filter_lambda.tf | 4 ++-- .../components/callbacks/s3_bucket_client_config.tf | 8 ++++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/infrastructure/terraform/components/callbacks/locals.tf b/infrastructure/terraform/components/callbacks/locals.tf index d9f06bc3..6b33f8ab 100644 --- a/infrastructure/terraform/components/callbacks/locals.tf +++ b/infrastructure/terraform/components/callbacks/locals.tf @@ -53,4 +53,6 @@ locals { } applications_map_parameter_name = coalesce(var.applications_map_parameter_name, "/${var.project}/${var.environment}/${var.component}/applications-map") + + client_config_bucket_arn = "arn:aws:s3:::${var.project}-${var.aws_account_id}-${var.region}-${var.environment}-${var.component}-subscription-config" } diff --git a/infrastructure/terraform/components/callbacks/module_transform_filter_lambda.tf b/infrastructure/terraform/components/callbacks/module_transform_filter_lambda.tf index e7e69eb1..2b75ddd5 100644 --- a/infrastructure/terraform/components/callbacks/module_transform_filter_lambda.tf +++ b/infrastructure/terraform/components/callbacks/module_transform_filter_lambda.tf @@ -69,7 +69,7 @@ data "aws_iam_policy_document" "client_transform_filter_lambda" { ] resources = [ - module.client_config_bucket.arn, + local.client_config_bucket_arn, ] } @@ -82,7 +82,7 @@ data "aws_iam_policy_document" "client_transform_filter_lambda" { ] resources = [ - "${module.client_config_bucket.arn}/*", + "${local.client_config_bucket_arn}/*", ] } diff --git a/infrastructure/terraform/components/callbacks/s3_bucket_client_config.tf b/infrastructure/terraform/components/callbacks/s3_bucket_client_config.tf index 8bf25c83..9943affd 100644 --- a/infrastructure/terraform/components/callbacks/s3_bucket_client_config.tf +++ b/infrastructure/terraform/components/callbacks/s3_bucket_client_config.tf @@ -55,7 +55,7 @@ data "aws_iam_policy_document" "client_config_bucket" { ] resources = [ - module.client_config_bucket.arn, + local.client_config_bucket_arn, ] } @@ -73,7 +73,7 @@ data "aws_iam_policy_document" "client_config_bucket" { ] resources = [ - "${module.client_config_bucket.arn}/*", + "${local.client_config_bucket_arn}/*", ] } @@ -91,8 +91,8 @@ data "aws_iam_policy_document" "client_config_bucket" { ] resources = [ - module.client_config_bucket.arn, - "${module.client_config_bucket.arn}/*" + local.client_config_bucket_arn, + "${local.client_config_bucket_arn}/*" ] condition { From d4b4f70460fb342e10bdcd0cf3b94d729582aa9e Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Fri, 17 Apr 2026 12:59:59 +0100 Subject: [PATCH 04/65] CCM-16073 - Fixed terraform --- .../terraform/components/callbacks/README.md | 7 +- .../callbacks/elasticache_delivery_state.tf | 6 +- .../terraform/components/callbacks/locals.tf | 1 + .../callbacks/module_client_delivery.tf | 8 +- .../callbacks/module_mock_webhook_alb_mtls.tf | 28 +-- .../callbacks/s3_bucket_mtls_test_certs.tf | 174 ++++++++++++++++++ .../components/callbacks/variables.tf | 30 --- .../components/callbacks/versions.tf | 4 + .../cloudwatch_event_rule_per_subscription.tf | 8 +- .../client-delivery/iam_role_sqs_target.tf | 1 + 10 files changed, 199 insertions(+), 68 deletions(-) create mode 100644 infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf diff --git a/infrastructure/terraform/components/callbacks/README.md b/infrastructure/terraform/components/callbacks/README.md index 1a784db2..8ec08700 100644 --- a/infrastructure/terraform/components/callbacks/README.md +++ b/infrastructure/terraform/components/callbacks/README.md @@ -9,6 +9,7 @@ | [terraform](#requirement\_terraform) | >= 1.10.1 | | [aws](#requirement\_aws) | 6.13 | | [random](#requirement\_random) | ~> 3.0 | +| [tls](#requirement\_tls) | ~> 4.0 | ## Inputs | Name | Description | Type | Default | Required | @@ -32,11 +33,6 @@ | [log\_retention\_in\_days](#input\_log\_retention\_in\_days) | The retention period in days for the Cloudwatch Logs events to be retained, default of 0 is indefinite | `number` | `0` | no | | [message\_root\_uri](#input\_message\_root\_uri) | The root URI used for constructing message links in callback payloads | `string` | n/a | yes | | [mtls\_cert\_secret\_arn](#input\_mtls\_cert\_secret\_arn) | Secrets Manager ARN for the shared mTLS client certificate (production) | `string` | `""` | no | -| [mtls\_mock\_server\_cert\_s3\_key](#input\_mtls\_mock\_server\_cert\_s3\_key) | S3 key for the mock webhook server certificate PEM (signed by the test CA) | `string` | `""` | no | -| [mtls\_mock\_server\_key\_s3\_key](#input\_mtls\_mock\_server\_key\_s3\_key) | S3 key for the mock webhook server private key PEM | `string` | `""` | no | -| [mtls\_test\_ca\_s3\_key](#input\_mtls\_test\_ca\_s3\_key) | S3 key for the test CA certificate PEM bundle used for server verification and the mock webhook server cert chain | `string` | `""` | no | -| [mtls\_test\_cert\_s3\_key](#input\_mtls\_test\_cert\_s3\_key) | S3 key for the test mTLS client certificate bundle (dev) | `string` | `""` | no | -| [mtls\_test\_certs\_s3\_bucket](#input\_mtls\_test\_certs\_s3\_bucket) | S3 bucket containing test mTLS certificate material (dev) | `string` | `""` | no | | [parent\_acct\_environment](#input\_parent\_acct\_environment) | Name of the environment responsible for the acct resources used, affects things like DNS zone. Useful for named dev environments | `string` | `"main"` | no | | [pipe\_event\_patterns](#input\_pipe\_event\_patterns) | value | `list(string)` | `[]` | no | | [pipe\_log\_level](#input\_pipe\_log\_level) | Log level for the EventBridge Pipe. | `string` | `"ERROR"` | no | @@ -56,6 +52,7 @@ | [client\_transform\_filter\_lambda](#module\_client\_transform\_filter\_lambda) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-lambda.zip | n/a | | [kms](#module\_kms) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-kms.zip | n/a | | [mock\_webhook\_lambda](#module\_mock\_webhook\_lambda) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-lambda.zip | n/a | +| [mtls\_test\_certs\_bucket](#module\_mtls\_test\_certs\_bucket) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-s3bucket.zip | n/a | | [sqs\_inbound\_event](#module\_sqs\_inbound\_event) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-sqs.zip | n/a | ## Outputs diff --git a/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf b/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf index b1c140f0..e78ccd04 100644 --- a/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf +++ b/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf @@ -7,7 +7,7 @@ resource "aws_elasticache_serverless_cache" "delivery_state" { snapshot_retention_limit = 0 security_group_ids = [aws_security_group.elasticache_delivery_state.id] - subnet_ids = local.acct.private_subnet_ids + subnet_ids = try(local.acct.private_subnets[local.bc_name], []) kms_key_id = module.kms.key_arn @@ -34,7 +34,7 @@ resource "aws_elasticache_serverless_cache" "delivery_state" { resource "aws_security_group" "elasticache_delivery_state" { name = "${local.csi}-elasticache-delivery-state" description = "Security group for ElastiCache delivery state cluster" - vpc_id = local.acct.vpc_id + vpc_id = local.acct.vpc_ids[local.bc_name] tags = merge( local.default_tags, @@ -58,7 +58,7 @@ resource "aws_vpc_security_group_ingress_rule" "elasticache_from_lambda" { resource "aws_security_group" "https_client_lambda" { name = "${local.csi}-https-client-lambda" description = "Security group for per-client HTTPS Client Lambda functions" - vpc_id = local.acct.vpc_id + vpc_id = local.acct.vpc_ids[local.bc_name] tags = merge( local.default_tags, diff --git a/infrastructure/terraform/components/callbacks/locals.tf b/infrastructure/terraform/components/callbacks/locals.tf index 6b33f8ab..514be75c 100644 --- a/infrastructure/terraform/components/callbacks/locals.tf +++ b/infrastructure/terraform/components/callbacks/locals.tf @@ -1,4 +1,5 @@ locals { + bc_name = "client-callbacks" aws_lambda_functions_dir_path = "../../../../lambdas" log_destination_arn = "arn:aws:firehose:${var.region}:${var.aws_account_id}:deliverystream/nhs-main-obs-splunk-logs-firehose" root_domain_name = "${var.environment}.${local.acct.route53_zone_names["client-callbacks"]}" # e.g. [main|dev|abxy0].smsnudge.[dev|nonprod|prod].nhsnotify.national.nhs.uk diff --git a/infrastructure/terraform/components/callbacks/module_client_delivery.tf b/infrastructure/terraform/components/callbacks/module_client_delivery.tf index 78c23186..ebc2e9e1 100644 --- a/infrastructure/terraform/components/callbacks/module_client_delivery.tf +++ b/infrastructure/terraform/components/callbacks/module_client_delivery.tf @@ -37,10 +37,10 @@ module "client_delivery" { elasticache_iam_username = "${var.project}-${var.environment}-${var.component}-elasticache-user" mtls_cert_secret_arn = var.mtls_cert_secret_arn - mtls_test_cert_s3_bucket = var.mtls_test_certs_s3_bucket - mtls_test_cert_s3_key = var.mtls_test_cert_s3_key # gitleaks:allow - mtls_test_ca_s3_key = var.mtls_test_ca_s3_key # gitleaks:allow + mtls_test_cert_s3_bucket = var.deploy_mock_clients ? module.mtls_test_certs_bucket[0].bucket : "" + mtls_test_cert_s3_key = local.mtls_test_cert_s3_key # gitleaks:allow + mtls_test_ca_s3_key = local.mtls_test_ca_s3_key # gitleaks:allow - vpc_subnet_ids = local.acct.private_subnet_ids + vpc_subnet_ids = try(local.acct.private_subnets[local.bc_name], []) lambda_security_group_id = aws_security_group.https_client_lambda.id } diff --git a/infrastructure/terraform/components/callbacks/module_mock_webhook_alb_mtls.tf b/infrastructure/terraform/components/callbacks/module_mock_webhook_alb_mtls.tf index c70189e9..7e7badf8 100644 --- a/infrastructure/terraform/components/callbacks/module_mock_webhook_alb_mtls.tf +++ b/infrastructure/terraform/components/callbacks/module_mock_webhook_alb_mtls.tf @@ -2,7 +2,7 @@ resource "aws_security_group" "mock_webhook_alb" { count = var.deploy_mock_clients ? 1 : 0 name = "${local.csi}-mock-webhook-alb" description = "Security group for mock webhook ALB mTLS endpoint" - vpc_id = local.acct.vpc_id + vpc_id = local.acct.vpc_ids[local.bc_name] tags = merge( local.default_tags, @@ -42,29 +42,11 @@ resource "aws_vpc_security_group_egress_rule" "mock_webhook_alb_egress" { tags = local.default_tags } -data "aws_s3_object" "mtls_mock_server_cert" { - count = var.deploy_mock_clients ? 1 : 0 - bucket = var.mtls_test_certs_s3_bucket - key = var.mtls_mock_server_cert_s3_key -} - -data "aws_s3_object" "mtls_mock_server_key" { - count = var.deploy_mock_clients ? 1 : 0 - bucket = var.mtls_test_certs_s3_bucket - key = var.mtls_mock_server_key_s3_key -} - -data "aws_s3_object" "mtls_ca_bundle" { - count = var.deploy_mock_clients ? 1 : 0 - bucket = var.mtls_test_certs_s3_bucket - key = var.mtls_test_ca_s3_key # gitleaks:allow -} - resource "aws_acm_certificate" "mock_webhook_server" { count = var.deploy_mock_clients ? 1 : 0 - certificate_body = data.aws_s3_object.mtls_mock_server_cert[0].body - private_key = data.aws_s3_object.mtls_mock_server_key[0].body - certificate_chain = data.aws_s3_object.mtls_ca_bundle[0].body + certificate_body = tls_locally_signed_cert.mock_server[0].cert_pem + private_key = tls_private_key.mock_server[0].private_key_pem + certificate_chain = tls_self_signed_cert.test_ca[0].cert_pem tags = local.default_tags } @@ -74,7 +56,7 @@ resource "aws_lb" "mock_webhook_mtls" { internal = true load_balancer_type = "application" security_groups = [aws_security_group.mock_webhook_alb[0].id] - subnets = local.acct.private_subnet_ids + subnets = try(local.acct.private_subnets[local.bc_name], []) tags = local.default_tags } diff --git a/infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf b/infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf new file mode 100644 index 00000000..e04c8027 --- /dev/null +++ b/infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf @@ -0,0 +1,174 @@ +module "mtls_test_certs_bucket" { + count = var.deploy_mock_clients ? 1 : 0 + source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-s3bucket.zip" + + name = "mtls-test-certs" + + aws_account_id = var.aws_account_id + component = var.component + environment = var.environment + project = var.project + region = var.region + + default_tags = merge( + local.default_tags, + { + Description = "mTLS test certificate material for non-production callback delivery" + } + ) + + kms_key_arn = module.kms.key_arn + force_destroy = var.s3_enable_force_destroy + versioning = false + object_ownership = "BucketOwnerPreferred" + bucket_key_enabled = true + + policy_documents = [ + data.aws_iam_policy_document.mtls_test_certs_bucket[0].json + ] +} + +data "aws_iam_policy_document" "mtls_test_certs_bucket" { + count = var.deploy_mock_clients ? 1 : 0 + + statement { + sid = "DenyInsecureTransport" + effect = "Deny" + + principals { + type = "*" + identifiers = ["*"] + } + + actions = [ + "s3:*", + ] + + resources = [ + "arn:aws:s3:::${var.project}-${var.aws_account_id}-${var.region}-${var.environment}-${var.component}-mtls-test-certs", + "arn:aws:s3:::${var.project}-${var.aws_account_id}-${var.region}-${var.environment}-${var.component}-mtls-test-certs/*" + ] + + condition { + test = "Bool" + variable = "aws:SecureTransport" + values = ["false"] + } + } +} + +locals { + mtls_test_certs_s3_prefix = "callbacks/mtls-test" + mtls_test_cert_s3_key = "${local.mtls_test_certs_s3_prefix}/client-bundle.pem" + mtls_test_ca_s3_key = "${local.mtls_test_certs_s3_prefix}/ca.pem" +} + +# --- TLS provider: generate test CA, client, and server certificates --- + +resource "tls_private_key" "test_ca" { + count = var.deploy_mock_clients ? 1 : 0 + algorithm = "ECDSA" + ecdsa_curve = "P256" +} + +resource "tls_self_signed_cert" "test_ca" { + count = var.deploy_mock_clients ? 1 : 0 + private_key_pem = tls_private_key.test_ca[0].private_key_pem + is_ca_certificate = true + validity_period_hours = 87600 + + subject { + common_name = "NHS Notify Test CA" + organization = "NHS Notify" + country = "GB" + } + + allowed_uses = [ + "cert_signing", + ] +} + +resource "tls_private_key" "test_client" { + count = var.deploy_mock_clients ? 1 : 0 + algorithm = "ECDSA" + ecdsa_curve = "P256" +} + +resource "tls_cert_request" "test_client" { + count = var.deploy_mock_clients ? 1 : 0 + private_key_pem = tls_private_key.test_client[0].private_key_pem + + subject { + common_name = "NHS Notify Callbacks Test Client" + organization = "NHS Notify" + country = "GB" + } +} + +resource "tls_locally_signed_cert" "test_client" { + count = var.deploy_mock_clients ? 1 : 0 + cert_request_pem = tls_cert_request.test_client[0].cert_request_pem + ca_private_key_pem = tls_private_key.test_ca[0].private_key_pem + ca_cert_pem = tls_self_signed_cert.test_ca[0].cert_pem + validity_period_hours = 87600 + + allowed_uses = [ + "digital_signature", + "client_auth", + ] +} + +resource "tls_private_key" "mock_server" { + count = var.deploy_mock_clients ? 1 : 0 + algorithm = "ECDSA" + ecdsa_curve = "P256" +} + +resource "tls_cert_request" "mock_server" { + count = var.deploy_mock_clients ? 1 : 0 + private_key_pem = tls_private_key.mock_server[0].private_key_pem + + subject { + common_name = "NHS Notify Mock Webhook Server" + organization = "NHS Notify" + country = "GB" + } + + dns_names = ["*.eu-west-2.elb.amazonaws.com"] +} + +resource "tls_locally_signed_cert" "mock_server" { + count = var.deploy_mock_clients ? 1 : 0 + cert_request_pem = tls_cert_request.mock_server[0].cert_request_pem + ca_private_key_pem = tls_private_key.test_ca[0].private_key_pem + ca_cert_pem = tls_self_signed_cert.test_ca[0].cert_pem + validity_period_hours = 87600 + + allowed_uses = [ + "digital_signature", + "key_encipherment", + "server_auth", + ] +} + +# --- S3 objects: Lambda reads certs from S3 at runtime --- + +resource "aws_s3_object" "mtls_test_client_bundle" { + count = var.deploy_mock_clients ? 1 : 0 + bucket = module.mtls_test_certs_bucket[0].id + key = local.mtls_test_cert_s3_key # gitleaks:allow + content = "${tls_locally_signed_cert.test_client[0].cert_pem}${tls_private_key.test_client[0].private_key_pem}" + + server_side_encryption = "aws:kms" + content_type = "application/x-pem-file" +} + +resource "aws_s3_object" "mtls_test_ca" { + count = var.deploy_mock_clients ? 1 : 0 + bucket = module.mtls_test_certs_bucket[0].id + key = local.mtls_test_ca_s3_key # gitleaks:allow + content = tls_self_signed_cert.test_ca[0].cert_pem + + server_side_encryption = "aws:kms" + content_type = "application/x-pem-file" +} diff --git a/infrastructure/terraform/components/callbacks/variables.tf b/infrastructure/terraform/components/callbacks/variables.tf index cb32e8e3..0f2fc202 100644 --- a/infrastructure/terraform/components/callbacks/variables.tf +++ b/infrastructure/terraform/components/callbacks/variables.tf @@ -184,36 +184,6 @@ variable "mtls_cert_secret_arn" { default = "" } -variable "mtls_test_certs_s3_bucket" { - type = string - description = "S3 bucket containing test mTLS certificate material (dev)" - default = "" -} - -variable "mtls_test_cert_s3_key" { - type = string - description = "S3 key for the test mTLS client certificate bundle (dev)" - default = "" -} - -variable "mtls_test_ca_s3_key" { - type = string - description = "S3 key for the test CA certificate PEM bundle used for server verification and the mock webhook server cert chain" - default = "" -} - -variable "mtls_mock_server_cert_s3_key" { - type = string - description = "S3 key for the mock webhook server certificate PEM (signed by the test CA)" - default = "" -} - -variable "mtls_mock_server_key_s3_key" { - type = string - description = "S3 key for the mock webhook server private key PEM" - default = "" -} - variable "elasticache_data_storage_maximum_gb" { type = number description = "Maximum data storage in GB for the ElastiCache Serverless delivery state cache" diff --git a/infrastructure/terraform/components/callbacks/versions.tf b/infrastructure/terraform/components/callbacks/versions.tf index 55552749..88481e64 100644 --- a/infrastructure/terraform/components/callbacks/versions.tf +++ b/infrastructure/terraform/components/callbacks/versions.tf @@ -8,6 +8,10 @@ terraform { source = "hashicorp/random" version = "~> 3.0" } + tls = { + source = "hashicorp/tls" + version = "~> 4.0" + } } required_version = ">= 1.10.1" diff --git a/infrastructure/terraform/modules/client-delivery/cloudwatch_event_rule_per_subscription.tf b/infrastructure/terraform/modules/client-delivery/cloudwatch_event_rule_per_subscription.tf index fa2e02e9..63da2089 100644 --- a/infrastructure/terraform/modules/client-delivery/cloudwatch_event_rule_per_subscription.tf +++ b/infrastructure/terraform/modules/client-delivery/cloudwatch_event_rule_per_subscription.tf @@ -1,7 +1,7 @@ resource "aws_cloudwatch_event_rule" "per_subscription" { for_each = var.subscriptions - name = "${local.client_prefix}-${each.key}" + name = "${local.csi}-${each.key}" description = "Client Callbacks event rule for client ${var.client_id} subscription ${each.key}" event_bus_name = var.client_bus_name @@ -11,7 +11,9 @@ resource "aws_cloudwatch_event_rule" "per_subscription" { } }) - tags = local.default_tags + tags = merge(local.default_tags, { + SubscriptionId = each.value.subscription_id + }) } resource "aws_cloudwatch_event_target" "per_subscription_target" { @@ -19,7 +21,7 @@ resource "aws_cloudwatch_event_target" "per_subscription_target" { rule = aws_cloudwatch_event_rule.per_subscription[each.value.subscription_id].name arn = module.sqs_delivery.sqs_queue_arn - target_id = "${local.client_prefix}-${each.value.target_id}" + target_id = "${local.csi}-${each.value.target_id}" event_bus_name = var.client_bus_name role_arn = aws_iam_role.eventbridge_sqs_target.arn diff --git a/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf b/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf index 9d2348a0..00737ea3 100644 --- a/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf +++ b/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf @@ -109,6 +109,7 @@ data "aws_iam_policy_document" "https_client_lambda" { resources = [ "arn:aws:s3:::${var.mtls_test_cert_s3_bucket}/${var.mtls_test_cert_s3_key}", + "arn:aws:s3:::${var.mtls_test_cert_s3_bucket}/${var.mtls_test_ca_s3_key}", ] } } From dba2894c632adb9e6e9255e832c01965453c57e8 Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Fri, 17 Apr 2026 14:41:04 +0100 Subject: [PATCH 05/65] CCM-16073 - Attempt to trigger a fresh build --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index e97bd341..cb2edf3e 100644 --- a/.gitignore +++ b/.gitignore @@ -12,7 +12,7 @@ version.json # Please, add your custom content below! -# dependencies +# Dependencies node_modules .node-version */node_modules From 3cd6e4d7306bb887dec20221aca8ca97e2ee6341 Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Fri, 17 Apr 2026 15:29:26 +0100 Subject: [PATCH 06/65] CCM-16073 - Fixed terraform --- .../client-delivery/iam_role_sqs_target.tf | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf b/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf index 00737ea3..8678daf3 100644 --- a/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf +++ b/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf @@ -81,6 +81,24 @@ data "aws_iam_policy_document" "https_client_lambda" { ] } + dynamic "statement" { + for_each = var.lambda_security_group_id != "" ? [1] : [] + content { + sid = "VPCNetworkInterfacePermissions" + effect = "Allow" + + actions = [ + "ec2:CreateNetworkInterface", + "ec2:DeleteNetworkInterface", + "ec2:DescribeNetworkInterfaces", + ] + + resources = [ + "*", + ] + } + } + dynamic "statement" { for_each = var.mtls_cert_secret_arn != "" ? [1] : [] content { From 6810e97e24eb8edcb767cae8d9f6dac64d8953d1 Mon Sep 17 00:00:00 2001 From: Tim Marston Date: Fri, 17 Apr 2026 14:54:55 +0100 Subject: [PATCH 07/65] updated gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index cb2edf3e..d1b1e760 100644 --- a/.gitignore +++ b/.gitignore @@ -22,3 +22,4 @@ node_modules dist .DS_Store .reports +*~ From e46720c53667c6dea297c331be07993929e48f9a Mon Sep 17 00:00:00 2001 From: Tim Marston Date: Fri, 17 Apr 2026 16:00:54 +0100 Subject: [PATCH 08/65] updated vale acceptable words --- .../config/vale/styles/config/vocabularies/words/accept.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/scripts/config/vale/styles/config/vocabularies/words/accept.txt b/scripts/config/vale/styles/config/vocabularies/words/accept.txt index ee5b597c..ed5cb1a6 100644 --- a/scripts/config/vale/styles/config/vocabularies/words/accept.txt +++ b/scripts/config/vale/styles/config/vocabularies/words/accept.txt @@ -1,4 +1,5 @@ ajv +APIs asdf auditability Bitwarden @@ -22,6 +23,7 @@ Grype idempotence Jira namespace +NFRs npm OAuth Octokit @@ -39,3 +41,4 @@ teardown Terraform toolchain Trufflehog +validators From 0765ef1767257c112076955a67169f540fd358f6 Mon Sep 17 00:00:00 2001 From: Tim Marston Date: Fri, 17 Apr 2026 16:01:31 +0100 Subject: [PATCH 09/65] updated docs for npm->pnpm changeover --- AGENTS.md | 18 +++++++++--------- README.md | 2 +- docs/Makefile | 4 ++-- docs/test-standards.md | 4 ++-- knip.ts | 2 +- tests/performance/README.md | 2 +- .../client-subscriptions-management/README.md | 14 +++++++------- 7 files changed, 23 insertions(+), 23 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 982ca631..0ef373b1 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -23,25 +23,25 @@ Agents should look for a nested `AGENTS.md` in or near these areas before making ## Root package.json – role and usage -The root `package.json` is the orchestration manifestgit co for this repo. It does not ship application code; it wires up shared dev tooling and delegates to workspace-level projects. +The root `package.json` is the orchestration manifest for this repo. It does not ship application code; it wires up shared dev tooling and delegates to workspace-level projects. -- Workspaces: Declares the set of npm workspaces (e.g. under `lambdas/`, `utils/`, `tests/`, `scripts/`). Agents should add a new workspace path here when introducing a new npm project. -- Scripts: Provides top-level commands that fan out across workspaces using `--workspaces` (lint, typecheck, unit tests) and project-specific runners (e.g. `lambda-build`). +- Workspaces: Declares the set of pnpm workspaces (e.g. under `lambdas/`, `utils/`, `tests/`, `scripts/`). Agents should add a new workspace path here when introducing a new pnpm project. +- Scripts: Provides top-level commands that fan out across workspaces using `pnpm -r` (lint, typecheck, unit tests) and project-specific runners (e.g. `lambda-build`). - Dev tool dependencies: Centralises Jest, TypeScript, ESLint configurations and plugins to keep versions consistent across workspaces. Workspace projects should rely on these unless a local override is strictly needed. - Overrides/resolutions: Pins transitive dependencies (e.g. Jest/react-is) to avoid ecosystem conflicts. Agents must not remove overrides without verifying tests across all workspaces. Agent guidance: -- Before adding or removing a workspace, update the root `workspaces` array and ensure CI scripts still succeed with `npm run lint`, `npm run typecheck`, and `npm run test:unit` at the repo root. -- When adding repo-wide scripts, keep names consistent with existing patterns (e.g. `lint`, `lint:fix`, `typecheck`, `test:unit`, `lambda-build`) and prefer `--workspaces` fan-out. +- Before adding or removing a workspace, update the root `workspaces` array and ensure CI scripts still succeed with `pnpm run lint`, `pnpm run typecheck`, and `pnpm run test:unit` at the repo root. +- When adding repo-wide scripts, keep names consistent with existing patterns (e.g. `lint`, `lint:fix`, `typecheck`, `test:unit`, `lambda-build`) and prefer `pnpm -r` fan-out. - Do not publish from the root. If adding a new workspace intended for publication, mark that workspace package as `private: false` and keep the root as private. - Validate changes by running the repo pre-commit hooks: `make githooks-run`. Success criteria for changes affecting the root `package.json`: -- `npm run lint`, `npm run typecheck`, and `npm run test:unit` pass at the repo root. -- Workspace discovery is correct (new projects appear under `npm run typecheck --workspaces`). -- No regression in lambda build tooling (`npm run lambda-build`). +- `pnpm run lint`, `pnpm run typecheck`, and `pnpm run test:unit` pass at the repo root. +- Workspace discovery is correct (new projects appear under `pnpm run typecheck -r`). +- No regression in lambda build tooling (`pnpm run lambda-build`). ## What Agents Can / Can’t Do @@ -81,7 +81,7 @@ When proposing a change, agents should: to catch formatting and basic lint issues. Domain specific checks will be defined in appropriate nested AGENTS.md files. -- Suggest at least one extra validation step (for example `npm test:unit` in a lambda, or triggering a specific workflow). +- Suggest at least one extra validation step (for example `pnpm run test:unit` in a lambda, or triggering a specific workflow). - Any required follow up activites which fall outside of the current task's scope should be clearly marked with a 'TODO: CCM-12345' comment. The human user should be prompted to create and provide a JIRA ticket ID to be added to the comment. ## Security & Safety diff --git a/README.md b/README.md index 08fda19f..44026da2 100644 --- a/README.md +++ b/README.md @@ -102,7 +102,7 @@ make config Run unit tests for Lambda functions: ```shell -npm test +pnpm test:unit ``` ## Infrastructure diff --git a/docs/Makefile b/docs/Makefile index ea4bc005..a7854d6a 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -20,10 +20,10 @@ $(if $(BASE_URL),-- --baseurl $(BASE_URL),-- --baseurl "") endef build: version - npm run build $(baseurlparam) + pnpm run build $(baseurlparam) debug: version - npm run debug + pnpm run debug version: touch _config.version.yml diff --git a/docs/test-standards.md b/docs/test-standards.md index c8ab86cf..4fef9318 100644 --- a/docs/test-standards.md +++ b/docs/test-standards.md @@ -104,7 +104,7 @@ AI must: - Verify mock return types match the actual function return types. 7. **The "Test Execution" Mandate**: - - After creating or modifying a test, you MUST run it using the repo's test command - e.g. npm run test:unit + - After creating or modifying a test, you MUST run it using the repo's test command - e.g. pnpm run test:unit - If the test fails due to incorrect imports, paths, or signatures, fix and re-run. - Only report completion when the test passes (exit code 0) and test coverage checks also pass. - See section 6.2 for the full self-correction loop requirements. @@ -192,7 +192,7 @@ AI must: When AI changes tests, it must: -- run all the tests in the npm workspace. +- run all the tests in the pnpm workspace. - report exactly what it ran and whether it passed. ### 6.2 AI Self-Correction Loop diff --git a/knip.ts b/knip.ts index 44eb1e8d..e26f45e9 100644 --- a/knip.ts +++ b/knip.ts @@ -18,7 +18,7 @@ const config: KnipConfig = { // ESLint peer deps – referenced indirectly through plugin configs "@stylistic/eslint-plugin", "@typescript-eslint/parser", - // Used in lambdas' lambda-build npm script via pnpm exec + // Used in lambdas' lambda-build script via pnpm exec "esbuild", // Used in scripts/tests/unit.sh (shell script, not scanned by knip) "lcov-result-merger", diff --git a/tests/performance/README.md b/tests/performance/README.md index 9a44eccf..552dda01 100644 --- a/tests/performance/README.md +++ b/tests/performance/README.md @@ -22,7 +22,7 @@ Load tests for the client-callbacks service. These tests run against a real depl From the repository root: ```bash -ENVIRONMENT=dev AWS_ACCOUNT_ID=123456789012 npm run test:performance --workspace tests/performance +ENVIRONMENT=dev AWS_ACCOUNT_ID=123456789012 pnpm run test:performance --filter tests/performance ``` ## What the Tests Do diff --git a/tools/client-subscriptions-management/README.md b/tools/client-subscriptions-management/README.md index 7593bb07..9bef106b 100644 --- a/tools/client-subscriptions-management/README.md +++ b/tools/client-subscriptions-management/README.md @@ -7,7 +7,7 @@ TypeScript CLI utility for managing NHS Notify client subscription configuration From the repository root run: ```bash -npm --workspace tools/client-subscriptions-management run -- [options] +pnpm --filter client-subscriptions-management run -- [options] ``` ## Example @@ -15,7 +15,7 @@ npm --workspace tools/client-subscriptions-management run -- [options] Deploy a message status subscription to the `dev` environment using a named AWS profile: ```bash -npm --workspace tools/client-subscriptions-management run deploy -- message \ +pnpm --filter client-subscriptions-management run deploy -- message \ --environment dev \ --profile my-profile \ --client-id my-client \ @@ -33,7 +33,7 @@ npm --workspace tools/client-subscriptions-management run deploy -- message \ #### Message status ```bash -npm --workspace tools/client-subscriptions-management run deploy -- message \ +pnpm --filter client-subscriptions-management run deploy -- message \ --environment dev \ --client-id client-123 \ --message-statuses DELIVERED FAILED \ @@ -47,7 +47,7 @@ npm --workspace tools/client-subscriptions-management run deploy -- message \ #### Channel status ```bash -npm --workspace tools/client-subscriptions-management run deploy -- channel \ +pnpm --filter client-subscriptions-management run deploy -- channel \ --environment dev \ --client-id client-123 \ --channel-type EMAIL \ @@ -67,7 +67,7 @@ Optional for both: `--client-name "Test Client"` (defaults to client-id if not p ### Get Client Subscriptions By Client ID ```bash -npm --workspace tools/client-subscriptions-management run get-by-client-id -- \ +pnpm --filter client-subscriptions-management run get-by-client-id -- \ --environment dev \ --client-id client-123 ``` @@ -75,7 +75,7 @@ npm --workspace tools/client-subscriptions-management run get-by-client-id -- \ ### Put Message Status Subscription (S3 upload only) ```bash -npm --workspace tools/client-subscriptions-management run put-message-status -- \ +pnpm --filter client-subscriptions-management run put-message-status -- \ --environment dev \ --client-id client-123 \ --message-statuses DELIVERED FAILED \ @@ -91,7 +91,7 @@ Optional: `--client-name "Test Client"` (defaults to client-id if not provided), ### Put Channel Status Subscription (S3 upload only) ```bash -npm --workspace tools/client-subscriptions-management run put-channel-status -- \ +pnpm --filter client-subscriptions-management run put-channel-status -- \ --environment dev \ --client-id client-123 \ --channel-type EMAIL \ From 5b76a87e7e2235c0de8fb438405f5f3eaf6af76c Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Mon, 20 Apr 2026 09:28:43 +0100 Subject: [PATCH 10/65] CCM-16073 - PR feedback --- .luarc.json | 9 + .../terraform/components/callbacks/README.md | 2 +- .../callbacks/elasticache_delivery_state.tf | 48 +++++- .../terraform/components/callbacks/locals.tf | 2 +- .../components/callbacks/variables.tf | 6 +- .../modules/client-delivery/README.md | 10 +- .../modules/client-delivery/outputs.tf | 34 ---- knip.ts | 4 +- .../package.json | 2 +- .../helpers/client-subscription-fixtures.ts | 2 - .../__tests__/services/config-cache.test.ts | 66 -------- .../services/config-loader-service.test.ts | 16 +- .../__tests__/services/config-loader.test.ts | 21 ++- .../services/config-update.component.test.ts | 18 +- .../src/services/config-loader-service.ts | 59 ++++--- .../src/services/config-loader.ts | 75 +-------- lambdas/https-client-lambda/package.json | 2 +- .../src/__tests__/config-loader.test.ts | 16 +- .../src/__tests__/delivery-metrics.test.ts | 66 +++++++- .../__tests__/delivery-observability.test.ts | 93 ++++++++++- .../src/__tests__/endpoint-gate.test.ts | 2 +- .../__tests__/fixtures/handler-fixtures.ts | 5 +- .../src/__tests__/handler.test.ts | 50 +++--- .../src/__tests__/https-client.test.ts | 42 ++--- .../src/__tests__/retry-policy.test.ts | 10 +- .../__tests__/ssm-applications-map.test.ts | 2 +- .../src/__tests__/tls-agent-factory.test.ts | 67 +++++--- lambdas/https-client-lambda/src/handler.ts | 35 ++-- .../src/services/config-loader.ts | 72 +++----- .../src/services/delivery-metrics.ts | 28 ++++ .../src/services/delivery-observability.ts | 39 ++++- .../src/services/delivery/https-client.ts | 27 ++- .../src/services/delivery/retry-policy.ts | 4 +- .../services/delivery/tls-agent-factory.ts | 24 +-- .../src/services/endpoint-gate.ts | 92 +++++----- .../src/services/logger.ts | 1 - .../src/services/ssm-applications-map.ts | 11 +- lambdas/mock-webhook-lambda/src/index.ts | 3 + pnpm-lock.yaml | 42 ++++- src/config-subscription-cache/jest.config.ts | 14 ++ src/config-subscription-cache/package.json | 34 ++++ .../config-subscription-cache.test.ts | 157 ++++++++++++++++++ .../src/config-subscription-cache.ts | 110 ++++++++++++ src/config-subscription-cache/src/index.ts | 2 + src/config-subscription-cache/tsconfig.json | 14 ++ .../__tests__/client-config-schema.test.ts | 25 ++- src/models/src/client-config-schema.ts | 10 +- src/models/src/client-config.ts | 14 +- .../client-subscription-builder.test.ts | 33 ++-- .../cli/targets-set-certificate.test.ts | 8 +- .../entrypoint/cli/targets-set-mtls.test.ts | 20 ++- .../cli/targets-set-pinning.test.ts | 43 ++++- .../helpers/client-subscription-fixtures.ts | 2 - .../src/domain/client-subscription-builder.ts | 12 +- .../src/entrypoint/cli/clients-put.ts | 2 +- .../src/entrypoint/cli/helper.ts | 39 +++++ .../src/entrypoint/cli/targets-del.ts | 7 +- .../entrypoint/cli/targets-set-certificate.ts | 42 ++--- .../src/entrypoint/cli/targets-set-mtls.ts | 74 ++++----- .../src/entrypoint/cli/targets-set-pinning.ts | 82 ++++----- 60 files changed, 1190 insertions(+), 661 deletions(-) create mode 100644 .luarc.json delete mode 100644 infrastructure/terraform/modules/client-delivery/outputs.tf delete mode 100644 lambdas/client-transform-filter-lambda/src/__tests__/services/config-cache.test.ts delete mode 100644 lambdas/https-client-lambda/src/services/logger.ts create mode 100644 src/config-subscription-cache/jest.config.ts create mode 100644 src/config-subscription-cache/package.json create mode 100644 src/config-subscription-cache/src/__tests__/config-subscription-cache.test.ts create mode 100644 src/config-subscription-cache/src/config-subscription-cache.ts create mode 100644 src/config-subscription-cache/src/index.ts create mode 100644 src/config-subscription-cache/tsconfig.json diff --git a/.luarc.json b/.luarc.json new file mode 100644 index 00000000..25564465 --- /dev/null +++ b/.luarc.json @@ -0,0 +1,9 @@ +{ + "diagnostics": { + "globals": [ + "KEYS", + "ARGV", + "redis" + ] + } +} diff --git a/infrastructure/terraform/components/callbacks/README.md b/infrastructure/terraform/components/callbacks/README.md index 8ec08700..e8417f83 100644 --- a/infrastructure/terraform/components/callbacks/README.md +++ b/infrastructure/terraform/components/callbacks/README.md @@ -19,7 +19,7 @@ | [component](#input\_component) | The variable encapsulating the name of this component | `string` | `"callbacks"` | no | | [default\_tags](#input\_default\_tags) | A map of default tags to apply to all taggable resources within the component | `map(string)` | `{}` | no | | [deploy\_mock\_clients](#input\_deploy\_mock\_clients) | Flag to deploy mock webhook lambda for integration testing (test/dev environments only) | `bool` | `false` | no | -| [elasticache\_data\_storage\_maximum\_gb](#input\_elasticache\_data\_storage\_maximum\_gb) | Maximum data storage in GB for the ElastiCache Serverless delivery state cache | `number` | `1` | no | +| [elasticache\_data\_storage\_maximum\_mb](#input\_elasticache\_data\_storage\_maximum\_mb) | Maximum data storage in MB for the ElastiCache Serverless delivery state cache | `number` | `1024` | no | | [enable\_event\_anomaly\_detection](#input\_enable\_event\_anomaly\_detection) | Enable CloudWatch anomaly detection alarm for inbound event queue message reception | `bool` | `true` | no | | [enable\_xray\_tracing](#input\_enable\_xray\_tracing) | Enable AWS X-Ray active tracing for Lambda functions | `bool` | `false` | no | | [environment](#input\_environment) | The name of the tfscaffold environment | `string` | n/a | yes | diff --git a/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf b/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf index e78ccd04..3e90f293 100644 --- a/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf +++ b/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf @@ -13,8 +13,8 @@ resource "aws_elasticache_serverless_cache" "delivery_state" { cache_usage_limits { data_storage { - maximum = var.elasticache_data_storage_maximum_gb - unit = "GB" + maximum = var.elasticache_data_storage_maximum_mb + unit = "MB" } ecpu_per_second { @@ -90,6 +90,50 @@ resource "aws_vpc_security_group_egress_rule" "lambda_to_https" { tags = local.default_tags } +resource "aws_cloudwatch_metric_alarm" "elasticache_storage_utilisation" { + alarm_name = "${local.csi}-elasticache-storage-utilisation" + alarm_description = join(" ", [ + "CAPACITY: ElastiCache data storage utilisation exceeds 80%.", + "Review stored data or increase elasticache_data_storage_maximum_mb.", + ]) + + comparison_operator = "GreaterThanThreshold" + evaluation_periods = 2 + threshold = var.elasticache_data_storage_maximum_mb * 0.8 + actions_enabled = true + treat_missing_data = "notBreaching" + + metric_query { + id = "storage_used" + return_data = false + + metric { + metric_name = "BytesUsedForCache" + namespace = "AWS/ElastiCache" + period = 300 + stat = "Maximum" + + dimensions = { + CacheClusterId = aws_elasticache_serverless_cache.delivery_state.name + } + } + } + + metric_query { + id = "storage_used_mb" + expression = "storage_used / 1048576" + label = "Storage Used (MB)" + return_data = true + } + + tags = merge( + local.default_tags, + { + Name = "${local.csi}-elasticache-storage-utilisation" + }, + ) +} + resource "aws_cloudwatch_metric_alarm" "elasticache_ecpu_utilisation" { alarm_name = "${local.csi}-elasticache-ecpu-utilisation" alarm_description = join(" ", [ diff --git a/infrastructure/terraform/components/callbacks/locals.tf b/infrastructure/terraform/components/callbacks/locals.tf index 514be75c..64bd622c 100644 --- a/infrastructure/terraform/components/callbacks/locals.tf +++ b/infrastructure/terraform/components/callbacks/locals.tf @@ -21,7 +21,7 @@ locals { targets = [ for target in try(client.targets, []) : merge(target, { - invocationEndpoint = try(target.mtls.enabled, false) ? "https://${aws_lb.mock_webhook_mtls[0].dns_name}/${target.targetId}" : "http://${aws_lb.mock_webhook_mtls[0].dns_name}/${target.targetId}" + invocationEndpoint = try(target.delivery.mtls.enabled, false) ? "https://${aws_lb.mock_webhook_mtls[0].dns_name}/${target.targetId}" : "http://${aws_lb.mock_webhook_mtls[0].dns_name}/${target.targetId}" apiKey = merge(target.apiKey, { headerValue = random_password.mock_webhook_api_key[0].result }) }) ] diff --git a/infrastructure/terraform/components/callbacks/variables.tf b/infrastructure/terraform/components/callbacks/variables.tf index 0f2fc202..0e3ccd7f 100644 --- a/infrastructure/terraform/components/callbacks/variables.tf +++ b/infrastructure/terraform/components/callbacks/variables.tf @@ -184,8 +184,8 @@ variable "mtls_cert_secret_arn" { default = "" } -variable "elasticache_data_storage_maximum_gb" { +variable "elasticache_data_storage_maximum_mb" { type = number - description = "Maximum data storage in GB for the ElastiCache Serverless delivery state cache" - default = 1 + description = "Maximum data storage in MB for the ElastiCache Serverless delivery state cache" + default = 1024 } diff --git a/infrastructure/terraform/modules/client-delivery/README.md b/infrastructure/terraform/modules/client-delivery/README.md index 12dc05cc..0552f282 100644 --- a/infrastructure/terraform/modules/client-delivery/README.md +++ b/infrastructure/terraform/modules/client-delivery/README.md @@ -55,15 +55,7 @@ No requirements. | [sqs\_delivery](#module\_sqs\_delivery) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.9/terraform-sqs.zip | n/a | ## Outputs -| Name | Description | -|------|-------------| -| [delivery\_queue\_arn](#output\_delivery\_queue\_arn) | ARN of the per-client delivery SQS queue | -| [delivery\_queue\_url](#output\_delivery\_queue\_url) | URL of the per-client delivery SQS queue | -| [dlq\_arn](#output\_dlq\_arn) | ARN of the per-client delivery DLQ | -| [dlq\_url](#output\_dlq\_url) | URL of the per-client delivery DLQ | -| [lambda\_execution\_role\_arn](#output\_lambda\_execution\_role\_arn) | ARN of the Lambda execution IAM role | -| [lambda\_function\_arn](#output\_lambda\_function\_arn) | ARN of the per-client HTTPS Client Lambda function | -| [lambda\_function\_name](#output\_lambda\_function\_name) | Name of the per-client HTTPS Client Lambda function | +No outputs. diff --git a/infrastructure/terraform/modules/client-delivery/outputs.tf b/infrastructure/terraform/modules/client-delivery/outputs.tf deleted file mode 100644 index 727ae19d..00000000 --- a/infrastructure/terraform/modules/client-delivery/outputs.tf +++ /dev/null @@ -1,34 +0,0 @@ -output "delivery_queue_arn" { - description = "ARN of the per-client delivery SQS queue" - value = module.sqs_delivery.sqs_queue_arn -} - -output "delivery_queue_url" { - description = "URL of the per-client delivery SQS queue" - value = module.sqs_delivery.sqs_queue_url -} - -output "dlq_arn" { - description = "ARN of the per-client delivery DLQ" - value = module.dlq_delivery.sqs_queue_arn -} - -output "dlq_url" { - description = "URL of the per-client delivery DLQ" - value = module.dlq_delivery.sqs_queue_url -} - -output "lambda_function_name" { - description = "Name of the per-client HTTPS Client Lambda function" - value = module.https_client_lambda.function_name -} - -output "lambda_function_arn" { - description = "ARN of the per-client HTTPS Client Lambda function" - value = module.https_client_lambda.function_arn -} - -output "lambda_execution_role_arn" { - description = "ARN of the Lambda execution IAM role" - value = module.https_client_lambda.iam_role_arn -} diff --git a/knip.ts b/knip.ts index e26f45e9..fdb7d0f7 100644 --- a/knip.ts +++ b/knip.ts @@ -34,7 +34,6 @@ const config: KnipConfig = { }, "lambdas/https-client-lambda": { ignoreDependencies: ["@tsconfig/node22"], - entry: ["src/__tests__/**/*.ts"], }, "lambdas/mock-webhook-lambda": { ignoreDependencies: ["@tsconfig/node22"], @@ -42,6 +41,9 @@ const config: KnipConfig = { "src/config-cache": { ignoreDependencies: ["@tsconfig/node22"], }, + "src/config-subscription-cache": { + ignoreDependencies: ["@tsconfig/node22"], + }, "src/logger": { ignoreDependencies: ["@tsconfig/node22"], }, diff --git a/lambdas/client-transform-filter-lambda/package.json b/lambdas/client-transform-filter-lambda/package.json index d9c81d27..668250b4 100644 --- a/lambdas/client-transform-filter-lambda/package.json +++ b/lambdas/client-transform-filter-lambda/package.json @@ -1,7 +1,7 @@ { "dependencies": { "@aws-sdk/client-s3": "catalog:aws", - "@nhs-notify-client-callbacks/config-cache": "workspace:*", + "@nhs-notify-client-callbacks/config-subscription-cache": "workspace:*", "@nhs-notify-client-callbacks/logger": "workspace:*", "@nhs-notify-client-callbacks/models": "workspace:*", "aws-embedded-metrics": "catalog:app", diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/helpers/client-subscription-fixtures.ts b/lambdas/client-transform-filter-lambda/src/__tests__/helpers/client-subscription-fixtures.ts index 7713813e..9491292c 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/helpers/client-subscription-fixtures.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/helpers/client-subscription-fixtures.ts @@ -28,8 +28,6 @@ export const createTarget = ( headerValue: "secret", ...overrides.apiKey, }, - mtls: { enabled: false }, - certPinning: { enabled: false }, ...overrides, }); diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-cache.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-cache.test.ts deleted file mode 100644 index e86ef69f..00000000 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-cache.test.ts +++ /dev/null @@ -1,66 +0,0 @@ -import type { ClientSubscriptionConfiguration } from "@nhs-notify-client-callbacks/models"; -import { - createClientSubscriptionConfig, - createMessageStatusSubscription, -} from "__tests__/helpers/client-subscription-fixtures"; -import { ConfigCache } from "@nhs-notify-client-callbacks/config-cache"; - -const createConfig = (): ClientSubscriptionConfiguration => - createClientSubscriptionConfig("client-1", { - subscriptions: [ - createMessageStatusSubscription(["DELIVERED"], { targetIds: [] }), - ], - }); - -describe("ConfigCache", () => { - it("stores and retrieves configuration", () => { - const cache = new ConfigCache(60_000); - const config = createConfig(); - - cache.set("client-1", config); - - expect(cache.get("client-1")).toEqual(config); - }); - - it("returns undefined for non-existent key", () => { - const cache = new ConfigCache(60_000); - const result = cache.get("non-existent"); - - expect(result).toBeUndefined(); - }); - - it("returns undefined for expired entries", () => { - jest.useFakeTimers(); - jest.setSystemTime(new Date("2025-01-01T10:00:00Z")); - - const cache = new ConfigCache(1000); // 1 second TTL - const config = createConfig(); - - cache.set("client-1", config); - expect(cache.get("client-1")).toEqual(config); - - jest.advanceTimersByTime(1001); - - const result = cache.get("client-1"); - - expect(result).toBeUndefined(); - - jest.useRealTimers(); - }); - - it("clears all entries", () => { - const cache = new ConfigCache(60_000); - const config = createConfig(); - - cache.set("client-1", config); - cache.set("client-2", config); - - expect(cache.get("client-1")).toEqual(config); - expect(cache.get("client-2")).toEqual(config); - - cache.clear(); - - expect(cache.get("client-1")).toBeUndefined(); - expect(cache.get("client-2")).toBeUndefined(); - }); -}); diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader-service.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader-service.test.ts index a5741d2b..c907bb3f 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader-service.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader-service.test.ts @@ -1,4 +1,5 @@ import { S3Client } from "@aws-sdk/client-s3"; +import { ConfigSubscriptionCache } from "@nhs-notify-client-callbacks/config-subscription-cache"; import { ConfigLoader } from "services/config-loader"; import { ConfigLoaderService, @@ -8,6 +9,7 @@ import { const mockS3Client = jest.mocked(S3Client); const mockConfigLoader = jest.mocked(ConfigLoader); +const mockConfigSubscriptionCache = jest.mocked(ConfigSubscriptionCache); jest.mock("@aws-sdk/client-s3", () => ({ S3Client: jest.fn(), @@ -17,12 +19,19 @@ jest.mock("services/config-loader", () => ({ ConfigLoader: jest.fn(), })); +jest.mock("@nhs-notify-client-callbacks/config-subscription-cache", () => ({ + ConfigSubscriptionCache: jest.fn().mockImplementation(() => ({ + reset: jest.fn(), + })), +})); + describe("ConfigLoaderService", () => { const originalBucket = process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET; const originalPrefix = process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX; beforeEach(() => { mockConfigLoader.mockClear(); + mockConfigSubscriptionCache.mockClear(); process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET = "test-bucket"; }); @@ -60,7 +69,7 @@ describe("ConfigLoaderService", () => { delete process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX; const service = new ConfigLoaderService(); service.getLoader(); - expect(mockConfigLoader).toHaveBeenCalledWith( + expect(mockConfigSubscriptionCache).toHaveBeenCalledWith( expect.objectContaining({ keyPrefix: "client_subscriptions/" }), ); }); @@ -69,7 +78,7 @@ describe("ConfigLoaderService", () => { process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX = "custom_prefix/"; const service = new ConfigLoaderService(); service.getLoader(); - expect(mockConfigLoader).toHaveBeenCalledWith( + expect(mockConfigSubscriptionCache).toHaveBeenCalledWith( expect.objectContaining({ keyPrefix: "custom_prefix/" }), ); }); @@ -90,7 +99,6 @@ describe("ConfigLoaderService", () => { }); const service = new ConfigLoaderService(); service.reset(customClient); - // Should not throw and the loader should be available immediately expect(() => service.getLoader()).not.toThrow(); }); @@ -101,7 +109,7 @@ describe("ConfigLoaderService", () => { }); const service = new ConfigLoaderService(); service.reset(customClient); - expect(mockConfigLoader).toHaveBeenCalledWith( + expect(mockConfigSubscriptionCache).toHaveBeenCalledWith( expect.objectContaining({ keyPrefix: "custom_prefix/" }), ); }); diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader.test.ts index a94a5e0c..c9fecdce 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader.test.ts @@ -1,6 +1,6 @@ import { GetObjectCommand, NoSuchKey, S3Client } from "@aws-sdk/client-s3"; import { createMessageStatusConfig } from "__tests__/helpers/client-subscription-fixtures"; -import { ConfigCache } from "@nhs-notify-client-callbacks/config-cache"; +import { ConfigSubscriptionCache } from "@nhs-notify-client-callbacks/config-subscription-cache"; import { ConfigLoader } from "services/config-loader"; import { ConfigValidationError } from "services/validators/config-validator"; @@ -13,6 +13,15 @@ jest.mock("services/logger", () => ({ }, })); +jest.mock("@nhs-notify-client-callbacks/logger", () => ({ + logger: { + debug: jest.fn(), + info: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + }, +})); + const mockBody = (json: string) => ({ transformToString: jest.fn().mockResolvedValue(json), }); @@ -20,13 +29,15 @@ const mockBody = (json: string) => ({ const createValidConfig = (clientId: string) => createMessageStatusConfig(["DELIVERED"], clientId); -const createLoader = (send: jest.Mock) => - new ConfigLoader({ +const createLoader = (send: jest.Mock) => { + const cache = new ConfigSubscriptionCache({ + s3Client: { send } as unknown as S3Client, bucketName: "bucket", keyPrefix: "client_subscriptions/", - s3Client: { send } as unknown as S3Client, - cache: new ConfigCache(60_000), + ttlMs: 60_000, }); + return new ConfigLoader(cache); +}; describe("ConfigLoader", () => { it("loads and validates client configuration from S3", async () => { diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-update.component.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-update.component.test.ts index c6e0e532..487e6130 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-update.component.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-update.component.test.ts @@ -1,8 +1,17 @@ import { S3Client } from "@aws-sdk/client-s3"; import { createMessageStatusConfig } from "__tests__/helpers/client-subscription-fixtures"; -import { ConfigCache } from "@nhs-notify-client-callbacks/config-cache"; +import { ConfigSubscriptionCache } from "@nhs-notify-client-callbacks/config-subscription-cache"; import { ConfigLoader } from "services/config-loader"; +jest.mock("@nhs-notify-client-callbacks/logger", () => ({ + logger: { + debug: jest.fn(), + info: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + }, +})); + const makeConfig = (messageStatuses: string[]) => createMessageStatusConfig(messageStatuses as never); @@ -28,12 +37,13 @@ describe("config update component", () => { }, }); - const loader = new ConfigLoader({ + const cache = new ConfigSubscriptionCache({ + s3Client: { send } as unknown as S3Client, bucketName: "bucket", keyPrefix: "client_subscriptions/", - s3Client: { send } as unknown as S3Client, - cache: new ConfigCache(1000), + ttlMs: 1000, }); + const loader = new ConfigLoader(cache); const first = await loader.loadClientConfig("client-1"); const firstMessage = first?.subscriptions.find( diff --git a/lambdas/client-transform-filter-lambda/src/services/config-loader-service.ts b/lambdas/client-transform-filter-lambda/src/services/config-loader-service.ts index 43f760c8..b5542d01 100644 --- a/lambdas/client-transform-filter-lambda/src/services/config-loader-service.ts +++ b/lambdas/client-transform-filter-lambda/src/services/config-loader-service.ts @@ -1,5 +1,5 @@ import { S3Client } from "@aws-sdk/client-s3"; -import { ConfigCache } from "@nhs-notify-client-callbacks/config-cache"; +import { ConfigSubscriptionCache } from "@nhs-notify-client-callbacks/config-subscription-cache"; import { ConfigLoader } from "services/config-loader"; const DEFAULT_CACHE_TTL_SECONDS = 60; @@ -26,52 +26,49 @@ export const createS3Client = ( }; export class ConfigLoaderService { - private readonly cache: ConfigCache; - private loader: ConfigLoader | undefined; + private cache: ConfigSubscriptionCache | undefined; + + private readonly ttlMs: number; + constructor(cacheTtlMs: number = resolveCacheTtlMs()) { - this.cache = new ConfigCache(cacheTtlMs); + this.ttlMs = cacheTtlMs; } getLoader(): ConfigLoader { - const bucketName = process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET; - if (!bucketName) { - throw new Error("CLIENT_SUBSCRIPTION_CONFIG_BUCKET is required"); - } - if (this.loader) { return this.loader; } - this.loader = new ConfigLoader({ - bucketName, - keyPrefix: - process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX ?? - "client_subscriptions/", - s3Client: createS3Client(), - cache: this.cache, - }); - + this.cache = this.createCache(createS3Client()); + this.loader = new ConfigLoader(this.cache); return this.loader; } reset(s3Client?: S3Client): void { + this.cache?.reset(); this.loader = undefined; - this.cache.clear(); + this.cache = undefined; if (s3Client) { - const bucketName = process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET; - if (!bucketName) { - throw new Error("CLIENT_SUBSCRIPTION_CONFIG_BUCKET is required"); - } - this.loader = new ConfigLoader({ - bucketName, - keyPrefix: - process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX ?? - "client_subscriptions/", - s3Client, - cache: this.cache, - }); + this.cache = this.createCache(s3Client); + this.loader = new ConfigLoader(this.cache); + } + } + + private createCache(s3Client: S3Client): ConfigSubscriptionCache { + const bucketName = process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET; + if (!bucketName) { + throw new Error("CLIENT_SUBSCRIPTION_CONFIG_BUCKET is required"); } + + return new ConfigSubscriptionCache({ + s3Client, + bucketName, + keyPrefix: + process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX ?? + "client_subscriptions/", + ttlMs: this.ttlMs, + }); } } diff --git a/lambdas/client-transform-filter-lambda/src/services/config-loader.ts b/lambdas/client-transform-filter-lambda/src/services/config-loader.ts index 76a5380d..0b272774 100644 --- a/lambdas/client-transform-filter-lambda/src/services/config-loader.ts +++ b/lambdas/client-transform-filter-lambda/src/services/config-loader.ts @@ -1,82 +1,21 @@ -import { GetObjectCommand, NoSuchKey, S3Client } from "@aws-sdk/client-s3"; +import type { ConfigSubscriptionCache } from "@nhs-notify-client-callbacks/config-subscription-cache"; import type { ClientSubscriptionConfiguration } from "@nhs-notify-client-callbacks/models"; -import { ConfigCache } from "@nhs-notify-client-callbacks/config-cache"; import { logger } from "services/logger"; import { wrapUnknownError } from "services/error-handler"; -import { - ConfigValidationError, - validateClientConfig, -} from "services/validators/config-validator"; - -type ConfigLoaderOptions = { - bucketName: string; - keyPrefix: string; - s3Client: S3Client; - cache: ConfigCache; -}; - -function throwAsConfigError(error: unknown, clientId: string): never { - if (error instanceof ConfigValidationError) { - logger.error("Config validation failed with schema violations", { - clientId, - validationErrors: error.issues, - }); - throw error; - } - - const { message } = wrapUnknownError(error); - logger.error("Failed to load config from S3", { clientId }); - throw new ConfigValidationError([{ path: "config", message }]); -} +import { ConfigValidationError } from "services/validators/config-validator"; export class ConfigLoader { - constructor(private readonly options: ConfigLoaderOptions) {} + constructor(private readonly cache: ConfigSubscriptionCache) {} async loadClientConfig( clientId: string, ): Promise { - const cached = this.options.cache.get(clientId); - if (cached) { - logger.debug("Config loaded from cache", { clientId, cacheHit: true }); - return cached; - } - - logger.debug("Config not in cache, fetching from S3", { - clientId, - cacheHit: false, - }); - try { - const response = await this.options.s3Client.send( - new GetObjectCommand({ - Bucket: this.options.bucketName, - Key: `${this.options.keyPrefix}${clientId}.json`, - }), - ); - - if (!response.Body) { - throw new Error("S3 response body was empty"); - } - - const rawConfig = await response.Body.transformToString(); - const parsedConfig = JSON.parse(rawConfig) as unknown; - const validated = validateClientConfig(parsedConfig); - this.options.cache.set(clientId, validated); - logger.info("Config loaded successfully from S3", { - clientId, - subscriptionCount: validated.subscriptions.length, - }); - return validated; + return await this.cache.loadClientConfig(clientId); } catch (error) { - if (error instanceof NoSuchKey) { - logger.info( - "No config found in S3 for client - events will be filtered out", - { clientId }, - ); - return undefined; - } - throwAsConfigError(error, clientId); - return undefined; + const { message } = wrapUnknownError(error); + logger.error("Failed to load config", { clientId }); + throw new ConfigValidationError([{ path: "config", message }]); } } } diff --git a/lambdas/https-client-lambda/package.json b/lambdas/https-client-lambda/package.json index 19763116..f03e3485 100644 --- a/lambdas/https-client-lambda/package.json +++ b/lambdas/https-client-lambda/package.json @@ -4,7 +4,7 @@ "@aws-sdk/client-secrets-manager": "catalog:aws", "@aws-sdk/client-sqs": "catalog:aws", "@aws-sdk/client-ssm": "catalog:aws", - "@nhs-notify-client-callbacks/config-cache": "workspace:*", + "@nhs-notify-client-callbacks/config-subscription-cache": "workspace:*", "@nhs-notify-client-callbacks/logger": "workspace:*", "@nhs-notify-client-callbacks/models": "workspace:*", "@redis/client": "catalog:app", diff --git a/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts b/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts index a8ccdfb4..94ca1d00 100644 --- a/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts @@ -13,7 +13,7 @@ jest.mock("@aws-sdk/client-s3", () => { }; }); -jest.mock("services/logger", () => ({ +jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { info: jest.fn(), warn: jest.fn(), @@ -33,8 +33,6 @@ const VALID_TARGET = { invocationMethod: "POST" as const, invocationRateLimit: 10, apiKey: { headerName: "x-api-key", headerValue: "secret" }, - mtls: { enabled: false }, - certPinning: { enabled: false }, }; const VALID_CONFIG = { @@ -75,11 +73,17 @@ describe("loadTargetConfig", () => { }); it("rejects config missing required field", async () => { - // eslint-disable-next-line @typescript-eslint/naming-convention, sonarjs/no-unused-vars -- destructuring to exclude mtls - const { mtls: _unusedMtls, ...targetWithoutMtls } = VALID_TARGET; const invalidConfig = { ...VALID_CONFIG, - targets: [targetWithoutMtls], + targets: [ + { + type: VALID_TARGET.type, + invocationEndpoint: VALID_TARGET.invocationEndpoint, + invocationMethod: VALID_TARGET.invocationMethod, + invocationRateLimit: VALID_TARGET.invocationRateLimit, + apiKey: VALID_TARGET.apiKey, + }, + ], }; mockS3Send.mockResolvedValue(makeS3Response(invalidConfig)); diff --git a/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts b/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts index f8142060..803c19bb 100644 --- a/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts @@ -1,6 +1,6 @@ const mockCreateMetricsLogger = jest.fn(); jest.mock("aws-embedded-metrics", () => ({ - Unit: { Count: "Count" }, + Unit: { Count: "Count", Milliseconds: "Milliseconds" }, createMetricsLogger: () => mockCreateMetricsLogger(), })); @@ -154,6 +154,70 @@ describe("delivery-metrics", () => { ); }); + it("emitCircuitBreakerClosed emits correct metric", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery-metrics"); + const { emitCircuitBreakerClosed } = mod; + + emitCircuitBreakerClosed("target-42"); + + expect(mockMetrics.putMetric).toHaveBeenCalledWith( + "CircuitBreakerClosed", + 1, + "Count", + ); + }); + + it("emitRetryWindowExhausted emits correct metric", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery-metrics"); + const { emitRetryWindowExhausted } = mod; + + emitRetryWindowExhausted("target-42"); + + expect(mockMetrics.putMetric).toHaveBeenCalledWith( + "DeliveryRetryWindowExhausted", + 1, + "Count", + ); + }); + + it("emitAdmissionDenied emits correct metric with reason", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery-metrics"); + const { emitAdmissionDenied } = mod; + + emitAdmissionDenied("target-42", "rate_limited"); + + expect(mockMetrics.setProperty).toHaveBeenCalledWith( + "targetId", + "target-42", + ); + expect(mockMetrics.setProperty).toHaveBeenCalledWith( + "reason", + "rate_limited", + ); + expect(mockMetrics.putMetric).toHaveBeenCalledWith( + "AdmissionDenied", + 1, + "Count", + ); + }); + + it("emitDeliveryDuration emits correct metric", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery-metrics"); + const { emitDeliveryDuration } = mod; + + emitDeliveryDuration("target-42", 250); + + expect(mockMetrics.putMetric).toHaveBeenCalledWith( + "DeliveryDurationMs", + 250, + "Milliseconds", + ); + }); + it("flushMetrics calls flush on the instance", async () => { // @ts-expect-error -- modulePaths resolves at runtime const mod = await import("services/delivery-metrics"); diff --git a/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts b/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts index 8f0b943b..c4c3ab5f 100644 --- a/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts @@ -1,13 +1,17 @@ import { + recordAdmissionDenied, + recordCircuitBreakerClosed, recordCircuitBreakerOpen, recordDeliveryAttempt, + recordDeliveryDuration, recordDeliveryFailure, recordDeliveryPermanentFailure, recordDeliveryRateLimited, recordDeliverySuccess, + recordRetryWindowExhausted, } from "services/delivery-observability"; -jest.mock("services/logger", () => ({ +jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { info: jest.fn(), warn: jest.fn(), @@ -16,12 +20,16 @@ jest.mock("services/logger", () => ({ })); jest.mock("services/delivery-metrics", () => ({ + emitAdmissionDenied: jest.fn(), + emitCircuitBreakerClosed: jest.fn(), + emitCircuitBreakerOpen: jest.fn(), emitDeliveryAttempt: jest.fn(), - emitDeliverySuccess: jest.fn(), + emitDeliveryDuration: jest.fn(), emitDeliveryFailure: jest.fn(), emitDeliveryPermanentFailure: jest.fn(), - emitCircuitBreakerOpen: jest.fn(), + emitDeliverySuccess: jest.fn(), emitRateLimited: jest.fn(), + emitRetryWindowExhausted: jest.fn(), })); describe("delivery-observability", () => { @@ -29,7 +37,7 @@ describe("delivery-observability", () => { const { emitDeliveryAttempt } = jest.requireMock( "services/delivery-metrics", ); - const { logger } = jest.requireMock("services/logger"); + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); recordDeliveryAttempt("client-1", "target-1"); @@ -44,7 +52,7 @@ describe("delivery-observability", () => { const { emitDeliverySuccess } = jest.requireMock( "services/delivery-metrics", ); - const { logger } = jest.requireMock("services/logger"); + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); recordDeliverySuccess("client-1", "target-1"); @@ -59,7 +67,7 @@ describe("delivery-observability", () => { const { emitDeliveryPermanentFailure } = jest.requireMock( "services/delivery-metrics", ); - const { logger } = jest.requireMock("services/logger"); + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); recordDeliveryPermanentFailure("client-1", "target-1"); @@ -72,7 +80,7 @@ describe("delivery-observability", () => { it("recordDeliveryRateLimited emits metric and logs", () => { const { emitRateLimited } = jest.requireMock("services/delivery-metrics"); - const { logger } = jest.requireMock("services/logger"); + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); recordDeliveryRateLimited("client-1", "target-1"); @@ -87,7 +95,7 @@ describe("delivery-observability", () => { const { emitDeliveryFailure } = jest.requireMock( "services/delivery-metrics", ); - const { logger } = jest.requireMock("services/logger"); + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); recordDeliveryFailure("client-1", "target-1", 503, 30); @@ -103,13 +111,80 @@ describe("delivery-observability", () => { ); }); - it("recordCircuitBreakerOpen emits metric", () => { + it("recordCircuitBreakerOpen emits metric and logs", () => { const { emitCircuitBreakerOpen } = jest.requireMock( "services/delivery-metrics", ); + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); recordCircuitBreakerOpen("target-1"); expect(emitCircuitBreakerOpen).toHaveBeenCalledWith("target-1"); + expect(logger.warn).toHaveBeenCalledWith( + "Circuit breaker opened", + expect.objectContaining({ targetId: "target-1" }), + ); + }); + + it("recordCircuitBreakerClosed emits metric and logs", () => { + const { emitCircuitBreakerClosed } = jest.requireMock( + "services/delivery-metrics", + ); + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); + + recordCircuitBreakerClosed("target-1"); + + expect(emitCircuitBreakerClosed).toHaveBeenCalledWith("target-1"); + expect(logger.info).toHaveBeenCalledWith( + "Circuit breaker closed", + expect.objectContaining({ targetId: "target-1" }), + ); + }); + + it("recordRetryWindowExhausted emits metric and logs", () => { + const { emitRetryWindowExhausted } = jest.requireMock( + "services/delivery-metrics", + ); + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); + + recordRetryWindowExhausted("client-1", "target-1"); + + expect(emitRetryWindowExhausted).toHaveBeenCalledWith("target-1"); + expect(logger.warn).toHaveBeenCalledWith( + "Retry window exhausted — sending to DLQ", + expect.objectContaining({ clientId: "client-1", targetId: "target-1" }), + ); + }); + + it("recordAdmissionDenied emits metric and logs", () => { + const { emitAdmissionDenied } = jest.requireMock( + "services/delivery-metrics", + ); + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); + + recordAdmissionDenied("client-1", "target-1", "rate_limited"); + + expect(emitAdmissionDenied).toHaveBeenCalledWith( + "target-1", + "rate_limited", + ); + expect(logger.warn).toHaveBeenCalledWith( + "Admission denied", + expect.objectContaining({ + clientId: "client-1", + targetId: "target-1", + reason: "rate_limited", + }), + ); + }); + + it("recordDeliveryDuration emits metric", () => { + const { emitDeliveryDuration } = jest.requireMock( + "services/delivery-metrics", + ); + + recordDeliveryDuration("target-1", 250); + + expect(emitDeliveryDuration).toHaveBeenCalledWith("target-1", 250); }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts index 68280a5e..cc42a311 100644 --- a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts @@ -7,7 +7,7 @@ import { resetRedisClient, } from "services/endpoint-gate"; -jest.mock("services/logger"); +jest.mock("@nhs-notify-client-callbacks/logger"); const mockSendCommand = jest.fn(); const mockConnect = jest.fn().mockResolvedValue(undefined); diff --git a/lambdas/https-client-lambda/src/__tests__/fixtures/handler-fixtures.ts b/lambdas/https-client-lambda/src/__tests__/fixtures/handler-fixtures.ts index cf262c7b..3a93b63f 100644 --- a/lambdas/https-client-lambda/src/__tests__/fixtures/handler-fixtures.ts +++ b/lambdas/https-client-lambda/src/__tests__/fixtures/handler-fixtures.ts @@ -7,8 +7,9 @@ export const DEFAULT_TARGET = { invocationMethod: "POST" as const, invocationRateLimit: 10, apiKey: { headerName: "x-api-key", headerValue: "secret-key" }, - mtls: { enabled: true }, - certPinning: { enabled: false }, + delivery: { + mtls: { enabled: true }, + }, }; export const makeRecord = (overrides: Partial = {}): SQSRecord => ({ diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 7c48327d..4f665229 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -4,7 +4,7 @@ import { makeRecord, } from "__tests__/fixtures/handler-fixtures"; -jest.mock("services/logger", () => ({ +jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { info: jest.fn(), warn: jest.fn(), @@ -68,12 +68,16 @@ jest.mock("services/endpoint-gate", () => ({ })); jest.mock("services/delivery-metrics", () => ({ + emitAdmissionDenied: jest.fn(), + emitCircuitBreakerClosed: jest.fn(), + emitCircuitBreakerOpen: jest.fn(), emitDeliveryAttempt: jest.fn(), - emitDeliverySuccess: jest.fn(), + emitDeliveryDuration: jest.fn(), emitDeliveryFailure: jest.fn(), emitDeliveryPermanentFailure: jest.fn(), - emitCircuitBreakerOpen: jest.fn(), + emitDeliverySuccess: jest.fn(), emitRateLimited: jest.fn(), + emitRetryWindowExhausted: jest.fn(), flushMetrics: jest.fn().mockResolvedValue(undefined), })); @@ -88,7 +92,7 @@ describe("processRecords", () => { mockGetApplicationId.mockResolvedValue("app-id-1"); mockSignPayload.mockReturnValue("signature-abc"); mockBuildAgent.mockResolvedValue(mockAgent); - mockDeliverPayload.mockResolvedValue({ ok: true }); + mockDeliverPayload.mockResolvedValue({ outcome: "success" }); mockSendToDlq.mockResolvedValue(undefined); mockChangeVisibility.mockResolvedValue(undefined); mockJitteredBackoff.mockReturnValue(5); @@ -126,7 +130,7 @@ describe("processRecords", () => { }); it("sends permanent failure to DLQ and returns no failure", async () => { - mockDeliverPayload.mockResolvedValue({ ok: false, permanent: true }); + mockDeliverPayload.mockResolvedValue({ outcome: "permanent_failure" }); const failures = await processRecords([makeRecord()]); @@ -136,8 +140,7 @@ describe("processRecords", () => { it("returns failure for transient 5xx errors", async () => { mockDeliverPayload.mockResolvedValue({ - ok: false, - permanent: false, + outcome: "transient_failure", statusCode: 503, }); @@ -148,9 +151,7 @@ describe("processRecords", () => { it("returns failure for 429 rate-limited responses", async () => { mockDeliverPayload.mockResolvedValue({ - ok: false, - permanent: false, - statusCode: 429, + outcome: "rate_limited", retryAfterHeader: "60", }); @@ -171,10 +172,9 @@ describe("processRecords", () => { const record2 = makeRecord({ messageId: "msg-2" }); mockDeliverPayload - .mockResolvedValueOnce({ ok: true }) + .mockResolvedValueOnce({ outcome: "success" }) .mockResolvedValueOnce({ - ok: false, - permanent: false, + outcome: "transient_failure", statusCode: 500, }); @@ -220,8 +220,7 @@ describe("processRecords", () => { it("calls changeVisibility with backoff on 5xx then throws", async () => { mockDeliverPayload.mockResolvedValue({ - ok: false, - permanent: false, + outcome: "transient_failure", statusCode: 503, }); @@ -233,9 +232,7 @@ describe("processRecords", () => { it("delegates 429 handling to handleRateLimitedRecord", async () => { mockDeliverPayload.mockResolvedValue({ - ok: false, - permanent: false, - statusCode: 429, + outcome: "rate_limited", retryAfterHeader: "120", }); @@ -327,8 +324,7 @@ describe("processRecords", () => { }; mockLoadTargetConfig.mockResolvedValue(targetCb); mockDeliverPayload.mockResolvedValue({ - ok: false, - permanent: false, + outcome: "transient_failure", statusCode: 503, }); @@ -359,9 +355,7 @@ describe("processRecords", () => { it("does not call recordResult on 429 path", async () => { mockDeliverPayload.mockResolvedValue({ - ok: false, - permanent: false, - statusCode: 429, + outcome: "rate_limited", retryAfterHeader: "60", }); @@ -389,8 +383,7 @@ describe("processRecords", () => { }; mockLoadTargetConfig.mockResolvedValue(targetCb); mockDeliverPayload.mockResolvedValue({ - ok: false, - permanent: false, + outcome: "transient_failure", statusCode: 503, }); mockRecordResult.mockResolvedValue({ ok: false, state: "opened" }); @@ -411,8 +404,7 @@ describe("processRecords", () => { }; mockLoadTargetConfig.mockResolvedValue(targetCb); mockDeliverPayload.mockResolvedValue({ - ok: false, - permanent: false, + outcome: "transient_failure", statusCode: 503, }); mockRecordResult.mockResolvedValue({ ok: true, state: "closed" }); @@ -428,9 +420,7 @@ describe("processRecords", () => { it("emits RateLimited metric on 429 response", async () => { mockDeliverPayload.mockResolvedValue({ - ok: false, - permanent: false, - statusCode: 429, + outcome: "rate_limited", retryAfterHeader: "60", }); diff --git a/lambdas/https-client-lambda/src/__tests__/https-client.test.ts b/lambdas/https-client-lambda/src/__tests__/https-client.test.ts index 5bfa2864..bb1c0d11 100644 --- a/lambdas/https-client-lambda/src/__tests__/https-client.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/https-client.test.ts @@ -23,8 +23,6 @@ const createTarget = (): CallbackTarget => ({ invocationMethod: "POST", invocationRateLimit: 10, apiKey: { headerName: "x-api-key", headerValue: "secret" }, - mtls: { enabled: false }, - certPinning: { enabled: false }, }); const createMockAgent = () => ({}) as Agent; @@ -113,7 +111,7 @@ describe("deliverPayload", () => { jest.restoreAllMocks(); }); - it("returns ok: true on 2xx", async () => { + it("returns success on 2xx", async () => { mockHttpsRequest(200); const result = await deliverPayload( @@ -123,10 +121,10 @@ describe("deliverPayload", () => { createMockAgent(), ); - expect(result).toEqual({ ok: true }); + expect(result).toEqual({ outcome: "success" }); }); - it("returns permanent: true on 4xx non-429", async () => { + it("returns permanent_failure on 4xx non-429", async () => { mockHttpsRequest(400); const result = await deliverPayload( @@ -136,10 +134,10 @@ describe("deliverPayload", () => { createMockAgent(), ); - expect(result).toEqual({ ok: false, permanent: true }); + expect(result).toEqual({ outcome: "permanent_failure" }); }); - it("returns permanent: true on TLS error CERT_HAS_EXPIRED", async () => { + it("returns permanent_failure on TLS error CERT_HAS_EXPIRED", async () => { mockHttpsRequestError("CERT_HAS_EXPIRED"); const result = await deliverPayload( @@ -149,10 +147,10 @@ describe("deliverPayload", () => { createMockAgent(), ); - expect(result).toEqual({ ok: false, permanent: true }); + expect(result).toEqual({ outcome: "permanent_failure" }); }); - it("returns permanent: true on TLS pinning error", async () => { + it("returns permanent_failure on TLS pinning error", async () => { mockHttpsRequestError("ERR_CERT_PINNING_FAILED"); const result = await deliverPayload( @@ -162,10 +160,10 @@ describe("deliverPayload", () => { createMockAgent(), ); - expect(result).toEqual({ ok: false, permanent: true }); + expect(result).toEqual({ outcome: "permanent_failure" }); }); - it("returns ok: false, permanent: false on 5xx", async () => { + it("returns transient_failure on 5xx", async () => { mockHttpsRequest(503); const result = await deliverPayload( @@ -175,10 +173,10 @@ describe("deliverPayload", () => { createMockAgent(), ); - expect(result).toEqual({ ok: false, permanent: false, statusCode: 503 }); + expect(result).toEqual({ outcome: "transient_failure", statusCode: 503 }); }); - it("returns 429 with Retry-After header value", async () => { + it("returns rate_limited with Retry-After header value", async () => { mockHttpsRequest(429, { "retry-after": "60" }); const result = await deliverPayload( @@ -189,14 +187,12 @@ describe("deliverPayload", () => { ); expect(result).toEqual({ - ok: false, - permanent: false, - statusCode: 429, + outcome: "rate_limited", retryAfterHeader: "60", }); }); - it("returns 429 with undefined retryAfterHeader when header is absent", async () => { + it("returns rate_limited with undefined retryAfterHeader when header is absent", async () => { mockHttpsRequest(429); const result = await deliverPayload( @@ -207,14 +203,12 @@ describe("deliverPayload", () => { ); expect(result).toEqual({ - ok: false, - permanent: false, - statusCode: 429, + outcome: "rate_limited", retryAfterHeader: undefined, }); }); - it("returns ok: false, permanent: false on TCP error", async () => { + it("returns transient_failure on TCP error", async () => { mockHttpsRequestError("ECONNREFUSED"); const result = await deliverPayload( @@ -224,7 +218,7 @@ describe("deliverPayload", () => { createMockAgent(), ); - expect(result).toEqual({ ok: false, permanent: false, statusCode: 0 }); + expect(result).toEqual({ outcome: "transient_failure", statusCode: 0 }); }); it("uses port 443 when URL has no explicit port", async () => { @@ -239,7 +233,7 @@ describe("deliverPayload", () => { createMockAgent(), ); - expect(result).toEqual({ ok: true }); + expect(result).toEqual({ outcome: "success" }); const callUrl = (https.request as jest.Mock).mock.calls[0][0] as URL; expect(callUrl).toBeInstanceOf(URL); expect(callUrl.port).toBe(""); @@ -255,6 +249,6 @@ describe("deliverPayload", () => { createMockAgent(), ); - expect(result).toEqual({ ok: false, permanent: false, statusCode: 0 }); + expect(result).toEqual({ outcome: "transient_failure", statusCode: 0 }); }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts b/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts index ed9fbf9a..97bb734b 100644 --- a/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts @@ -17,23 +17,23 @@ jest.mock("services/sqs-visibility", () => ({ changeVisibility: (...args: unknown[]) => mockChangeVisibility(...args), })); -jest.mock("services/logger", () => ({ +jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { info: jest.fn(), warn: jest.fn(), error: jest.fn() }, })); describe("jitteredBackoffSeconds", () => { - it("produces value in [0, 5) at receiveCount=1", () => { + it("produces value in [1, 5) at receiveCount=1", () => { for (let i = 0; i < 100; i++) { const val = jitteredBackoffSeconds(1); - expect(val).toBeGreaterThanOrEqual(0); + expect(val).toBeGreaterThanOrEqual(1); expect(val).toBeLessThan(5); } }); - it("produces value in [0, 300) at receiveCount=10 (cap)", () => { + it("produces value in [1, 300) at receiveCount=10 (cap)", () => { for (let i = 0; i < 100; i++) { const val = jitteredBackoffSeconds(10); - expect(val).toBeGreaterThanOrEqual(0); + expect(val).toBeGreaterThanOrEqual(1); expect(val).toBeLessThan(300); } }); diff --git a/lambdas/https-client-lambda/src/__tests__/ssm-applications-map.test.ts b/lambdas/https-client-lambda/src/__tests__/ssm-applications-map.test.ts index 40592928..059023d1 100644 --- a/lambdas/https-client-lambda/src/__tests__/ssm-applications-map.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/ssm-applications-map.test.ts @@ -13,7 +13,7 @@ jest.mock("@aws-sdk/client-ssm", () => { }; }); -jest.mock("services/logger", () => ({ +jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { info: jest.fn(), warn: jest.fn(), diff --git a/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts b/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts index 6b306234..3f6f27ca 100644 --- a/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts @@ -20,7 +20,7 @@ jest.mock("@aws-sdk/client-secrets-manager", () => { }; }); -jest.mock("services/logger", () => ({ +jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { info: jest.fn(), warn: jest.fn(), @@ -80,8 +80,6 @@ const createTarget = ( invocationMethod: "POST", invocationRateLimit: 10, apiKey: { headerName: "x-api-key", headerValue: "secret" }, - mtls: { enabled: false }, - certPinning: { enabled: false }, ...overrides, }); @@ -115,14 +113,16 @@ describe("tls-agent-factory", () => { it("builds agent with key and cert when mtls is enabled", async () => { mockS3PemResponse(COMBINED_PEM); - const agent = await buildAgent(createTarget({ mtls: { enabled: true } })); + const agent = await buildAgent( + createTarget({ delivery: { mtls: { enabled: true } } }), + ); expect(agent).toBeDefined(); expect(agent.options.keepAlive).toBe(false); }); it("builds agent without key and cert when mtls is disabled", async () => { - const agent = await buildAgent(createTarget({ mtls: { enabled: false } })); + const agent = await buildAgent(createTarget()); expect(agent).toBeDefined(); expect(mockS3Send).not.toHaveBeenCalled(); @@ -148,7 +148,7 @@ describe("tls-agent-factory", () => { }); const agent = await mod.buildAgent( - createTarget({ mtls: { enabled: true } }), + createTarget({ delivery: { mtls: { enabled: true } } }), ); expect(agent).toBeDefined(); @@ -157,7 +157,7 @@ describe("tls-agent-factory", () => { it("loads cert from S3 in non-production", async () => { mockS3PemResponse(COMBINED_PEM); - await buildAgent(createTarget({ mtls: { enabled: true } })); + await buildAgent(createTarget({ delivery: { mtls: { enabled: true } } })); expect(mockS3Send).toHaveBeenCalledTimes(1); expect(mockSecretsManagerSend).not.toHaveBeenCalled(); @@ -175,7 +175,7 @@ describe("tls-agent-factory", () => { }); const agent = await mod.buildAgent( - createTarget({ mtls: { enabled: true } }), + createTarget({ delivery: { mtls: { enabled: true } } }), ); expect(agent).toBeDefined(); @@ -185,7 +185,7 @@ describe("tls-agent-factory", () => { it("caches cert material on subsequent calls", async () => { mockS3PemResponse(COMBINED_PEM); - const target = createTarget({ mtls: { enabled: true } }); + const target = createTarget({ delivery: { mtls: { enabled: true } } }); await buildAgent(target); await buildAgent(target); @@ -203,7 +203,7 @@ describe("tls-agent-factory", () => { it("resets cached material via resetCache", async () => { mockS3PemResponse(COMBINED_PEM); - const target = createTarget({ mtls: { enabled: true } }); + const target = createTarget({ delivery: { mtls: { enabled: true } } }); await buildAgent(target); resetCache(); @@ -222,7 +222,7 @@ describe("tls-agent-factory", () => { mockSecretsManagerSend.mockResolvedValue({ SecretString: undefined }); await expect( - mod.buildAgent(createTarget({ mtls: { enabled: true } })), + mod.buildAgent(createTarget({ delivery: { mtls: { enabled: true } } })), ).rejects.toThrow("mTLS cert secret has no value"); }); @@ -234,7 +234,7 @@ describe("tls-agent-factory", () => { const mod = await import("services/delivery/tls-agent-factory"); await expect( - mod.buildAgent(createTarget({ mtls: { enabled: true } })), + mod.buildAgent(createTarget({ delivery: { mtls: { enabled: true } } })), ).rejects.toThrow( "MTLS_TEST_CERT_S3_BUCKET and MTLS_TEST_CERT_S3_KEY are required", ); @@ -244,15 +244,19 @@ describe("tls-agent-factory", () => { mockS3Send.mockResolvedValue({ Body: undefined }); await expect( - buildAgent(createTarget({ mtls: { enabled: true } })), + buildAgent(createTarget({ delivery: { mtls: { enabled: true } } })), ).rejects.toThrow("has no body"); }); it("builds agent with checkServerIdentity when certPinning is enabled", async () => { mockS3PemResponse(COMBINED_PEM); const target = createTarget({ - mtls: { enabled: true }, - certPinning: { enabled: true, spkiHash: "abc123" }, + delivery: { + mtls: { + enabled: true, + certPinning: { enabled: true, spkiHash: "abc123" }, + }, + }, }); const agent = await buildAgent(target); @@ -264,8 +268,12 @@ describe("tls-agent-factory", () => { it("checkServerIdentity returns error when SPKI hash does not match", async () => { mockS3PemResponse(COMBINED_PEM); const target = createTarget({ - mtls: { enabled: true }, - certPinning: { enabled: true, spkiHash: "expected-hash" }, + delivery: { + mtls: { + enabled: true, + certPinning: { enabled: true, spkiHash: "expected-hash" }, + }, + }, }); const agent = await buildAgent(target); @@ -297,8 +305,12 @@ describe("tls-agent-factory", () => { mockS3PemResponse(COMBINED_PEM); const target = createTarget({ - mtls: { enabled: true }, - certPinning: { enabled: true, spkiHash: expectedHash }, + delivery: { + mtls: { + enabled: true, + certPinning: { enabled: true, spkiHash: expectedHash }, + }, + }, }); const agent = await buildAgent(target); @@ -321,8 +333,12 @@ describe("tls-agent-factory", () => { it("checkServerIdentity returns default error when hostname does not match", async () => { mockS3PemResponse(COMBINED_PEM); const target = createTarget({ - mtls: { enabled: true }, - certPinning: { enabled: true, spkiHash: "abc" }, + delivery: { + mtls: { + enabled: true, + certPinning: { enabled: true, spkiHash: "abc" }, + }, + }, }); const agent = await buildAgent(target); @@ -344,7 +360,7 @@ describe("tls-agent-factory", () => { }); it("does not load cert material when mtls is disabled", async () => { - const agent = await buildAgent(createTarget({ mtls: { enabled: false } })); + const agent = await buildAgent(createTarget()); expect(agent).toBeDefined(); expect(mockS3Send).not.toHaveBeenCalled(); @@ -353,7 +369,12 @@ describe("tls-agent-factory", () => { it("throws when certPinning.enabled is true but spkiHash is missing", async () => { const target = createTarget({ - certPinning: { enabled: true }, + delivery: { + mtls: { + enabled: true, + certPinning: { enabled: true }, + }, + }, }); await expect(buildAgent(target)).rejects.toThrow( diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index 95c28a89..cf12c56a 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -1,7 +1,7 @@ import type { SQSBatchItemFailure, SQSRecord } from "aws-lambda"; import type { ClientCallbackPayload } from "@nhs-notify-client-callbacks/models"; import pMap from "p-map"; -import { logger } from "services/logger"; +import { logger } from "@nhs-notify-client-callbacks/logger"; import { loadTargetConfig } from "services/config-loader"; import { getApplicationId } from "services/ssm-applications-map"; import { signPayload } from "services/payload-signer"; @@ -22,12 +22,16 @@ import { recordResult, } from "services/endpoint-gate"; import { + recordAdmissionDenied, + recordCircuitBreakerClosed, recordCircuitBreakerOpen, recordDeliveryAttempt, + recordDeliveryDuration, recordDeliveryFailure, recordDeliveryPermanentFailure, recordDeliveryRateLimited, recordDeliverySuccess, + recordRetryWindowExhausted, } from "services/delivery-observability"; import { flushMetrics } from "services/delivery-metrics"; @@ -70,12 +74,7 @@ async function checkAdmission( if (!gateResult.allowed) { const delaySec = Math.ceil(gateResult.retryAfterMs / 1000); - logger.warn(`Admission denied: ${gateResult.reason} — requeuing`, { - clientId, - targetId, - reason: gateResult.reason, - delaySec, - }); + recordAdmissionDenied(clientId, targetId, gateResult.reason); await changeVisibility(record.receiptHandle, delaySec); throw new Error(`Admission denied: ${gateResult.reason}`); } @@ -89,23 +88,25 @@ async function handleDeliveryResult( targetId: string, cbEnabled: boolean, ): Promise { - if (result.ok) { + if (result.outcome === "success") { if (cbEnabled) { - await recordResult(redis, targetId, true, gateConfig); + const cbOutcome = await recordResult(redis, targetId, true, gateConfig); + if (cbOutcome.ok && cbOutcome.state === "closed") { + recordCircuitBreakerClosed(targetId); + } } recordDeliverySuccess(clientId, targetId); return; } - if (result.permanent) { + if (result.outcome === "permanent_failure") { recordDeliveryPermanentFailure(clientId, targetId); await sendToDlq(record.body); return; } - const receiveCount = Number(record.attributes.ApproximateReceiveCount); - - if ("retryAfterHeader" in result) { + if (result.outcome === "rate_limited") { + const receiveCount = Number(record.attributes.ApproximateReceiveCount); recordDeliveryRateLimited(clientId, targetId); await handleRateLimitedRecord( record, @@ -117,6 +118,7 @@ async function handleDeliveryResult( return; } + const receiveCount = Number(record.attributes.ApproximateReceiveCount); const backoffSec = jitteredBackoffSeconds(receiveCount); if (cbEnabled) { const cbOutcome = await recordResult(redis, targetId, false, gateConfig); @@ -154,10 +156,7 @@ async function processRecord( ); if (isWindowExhausted(firstReceivedMs, maxRetryDurationMs)) { - logger.warn("Retry window exhausted — sending to DLQ", { - clientId: CLIENT_ID, - targetId, - }); + recordRetryWindowExhausted(CLIENT_ID, targetId); await sendToDlq(record.body); return; } @@ -183,7 +182,9 @@ async function processRecord( const payloadJson = JSON.stringify(payload); recordDeliveryAttempt(CLIENT_ID, targetId); + const deliveryStart = Date.now(); const result = await deliverPayload(target, payloadJson, signature, agent); + recordDeliveryDuration(targetId, Date.now() - deliveryStart); await handleDeliveryResult( result, diff --git a/lambdas/https-client-lambda/src/services/config-loader.ts b/lambdas/https-client-lambda/src/services/config-loader.ts index c8f9e714..11aa7c34 100644 --- a/lambdas/https-client-lambda/src/services/config-loader.ts +++ b/lambdas/https-client-lambda/src/services/config-loader.ts @@ -1,19 +1,30 @@ -import { GetObjectCommand, S3Client } from "@aws-sdk/client-s3"; -import { - type CallbackTarget, - parseClientSubscriptionConfiguration, -} from "@nhs-notify-client-callbacks/models"; -import { ConfigCache } from "@nhs-notify-client-callbacks/config-cache"; -import { logger } from "services/logger"; +import { S3Client } from "@aws-sdk/client-s3"; +import type { CallbackTarget } from "@nhs-notify-client-callbacks/models"; +import { ConfigSubscriptionCache } from "@nhs-notify-client-callbacks/config-subscription-cache"; const s3Client = new S3Client({}); -let cache: ConfigCache | undefined; +let cache: ConfigSubscriptionCache | undefined; -function getCache(): ConfigCache { +function getCache(): ConfigSubscriptionCache { if (!cache) { - const ttlSeconds = - Number(process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS) || 300; - cache = new ConfigCache(ttlSeconds * 1000); + const { + CLIENT_SUBSCRIPTION_CONFIG_BUCKET, + CLIENT_SUBSCRIPTION_CONFIG_PREFIX, + } = process.env; + if (!CLIENT_SUBSCRIPTION_CONFIG_BUCKET) { + throw new Error("CLIENT_SUBSCRIPTION_CONFIG_BUCKET is required"); + } + + const ttlMs = + Number(process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS) * 1000 || + 300_000; + + cache = new ConfigSubscriptionCache({ + s3Client, + bucketName: CLIENT_SUBSCRIPTION_CONFIG_BUCKET, + keyPrefix: CLIENT_SUBSCRIPTION_CONFIG_PREFIX ?? "client_subscriptions/", + ttlMs, + }); } return cache; } @@ -26,43 +37,10 @@ export async function loadTargetConfig( clientId: string, targetId: string, ): Promise { - let clientConfig = getCache().get(clientId); + const clientConfig = await getCache().loadClientConfig(clientId); if (!clientConfig) { - const { - CLIENT_SUBSCRIPTION_CONFIG_BUCKET, - CLIENT_SUBSCRIPTION_CONFIG_PREFIX, - } = process.env; - if (!CLIENT_SUBSCRIPTION_CONFIG_BUCKET) { - throw new Error("CLIENT_SUBSCRIPTION_CONFIG_BUCKET is required"); - } - - const prefix = CLIENT_SUBSCRIPTION_CONFIG_PREFIX ?? "client_subscriptions/"; - - const response = await s3Client.send( - new GetObjectCommand({ - Bucket: CLIENT_SUBSCRIPTION_CONFIG_BUCKET, - Key: `${prefix}${clientId}.json`, - }), - ); - - if (!response.Body) { - throw new Error(`S3 response body was empty for client '${clientId}'`); - } - - const raw = await response.Body.transformToString(); - const parsed = JSON.parse(raw) as unknown; - const result = parseClientSubscriptionConfiguration(parsed); - - if (!result.success) { - throw new Error( - `Invalid client config for '${clientId}': ${result.error.message}`, - ); - } - - clientConfig = result.data; - getCache().set(clientId, clientConfig); - logger.info("Client config loaded from S3", { clientId }); + throw new Error(`No configuration found for client '${clientId}'`); } const target = clientConfig.targets.find((t) => t.targetId === targetId); diff --git a/lambdas/https-client-lambda/src/services/delivery-metrics.ts b/lambdas/https-client-lambda/src/services/delivery-metrics.ts index f9a6bc49..68248591 100644 --- a/lambdas/https-client-lambda/src/services/delivery-metrics.ts +++ b/lambdas/https-client-lambda/src/services/delivery-metrics.ts @@ -61,6 +61,34 @@ export function emitCircuitBreakerOpen(targetId: string): void { metrics.putMetric("CircuitBreakerOpen", 1, Unit.Count); } +export function emitCircuitBreakerClosed(targetId: string): void { + const metrics = getMetrics(); + metrics.setProperty("targetId", targetId); + metrics.putMetric("CircuitBreakerClosed", 1, Unit.Count); +} + +export function emitRetryWindowExhausted(targetId: string): void { + const metrics = getMetrics(); + metrics.setProperty("targetId", targetId); + metrics.putMetric("DeliveryRetryWindowExhausted", 1, Unit.Count); +} + +export function emitAdmissionDenied(targetId: string, reason: string): void { + const metrics = getMetrics(); + metrics.setProperty("targetId", targetId); + metrics.setProperty("reason", reason); + metrics.putMetric("AdmissionDenied", 1, Unit.Count); +} + +export function emitDeliveryDuration( + targetId: string, + durationMs: number, +): void { + const metrics = getMetrics(); + metrics.setProperty("targetId", targetId); + metrics.putMetric("DeliveryDurationMs", durationMs, Unit.Milliseconds); +} + export async function flushMetrics(): Promise { if (metricsInstance) { await metricsInstance.flush(); diff --git a/lambdas/https-client-lambda/src/services/delivery-observability.ts b/lambdas/https-client-lambda/src/services/delivery-observability.ts index c2861e28..8fd4cea5 100644 --- a/lambdas/https-client-lambda/src/services/delivery-observability.ts +++ b/lambdas/https-client-lambda/src/services/delivery-observability.ts @@ -1,11 +1,15 @@ -import { logger } from "services/logger"; +import { logger } from "@nhs-notify-client-callbacks/logger"; import { + emitAdmissionDenied, + emitCircuitBreakerClosed, emitCircuitBreakerOpen, emitDeliveryAttempt, + emitDeliveryDuration, emitDeliveryFailure, emitDeliveryPermanentFailure, emitDeliverySuccess, emitRateLimited, + emitRetryWindowExhausted, } from "services/delivery-metrics"; export function recordDeliveryAttempt( @@ -60,4 +64,37 @@ export function recordDeliveryFailure( export function recordCircuitBreakerOpen(targetId: string): void { emitCircuitBreakerOpen(targetId); + logger.warn("Circuit breaker opened", { targetId }); +} + +export function recordCircuitBreakerClosed(targetId: string): void { + emitCircuitBreakerClosed(targetId); + logger.info("Circuit breaker closed", { targetId }); +} + +export function recordRetryWindowExhausted( + clientId: string, + targetId: string, +): void { + emitRetryWindowExhausted(targetId); + logger.warn("Retry window exhausted — sending to DLQ", { + clientId, + targetId, + }); +} + +export function recordAdmissionDenied( + clientId: string, + targetId: string, + reason: string, +): void { + emitAdmissionDenied(targetId, reason); + logger.warn("Admission denied", { clientId, targetId, reason }); +} + +export function recordDeliveryDuration( + targetId: string, + durationMs: number, +): void { + emitDeliveryDuration(targetId, durationMs); } diff --git a/lambdas/https-client-lambda/src/services/delivery/https-client.ts b/lambdas/https-client-lambda/src/services/delivery/https-client.ts index 41397928..c651fe6d 100644 --- a/lambdas/https-client-lambda/src/services/delivery/https-client.ts +++ b/lambdas/https-client-lambda/src/services/delivery/https-client.ts @@ -4,15 +4,10 @@ import type { CallbackTarget } from "@nhs-notify-client-callbacks/models"; import { PERMANENT_TLS_ERROR_CODES } from "services/delivery/tls-agent-factory"; export type DeliveryResult = - | { ok: true } - | { ok: false; permanent: true } - | { - ok: false; - permanent: false; - statusCode: 429; - retryAfterHeader: string | undefined; - } - | { ok: false; permanent: false; statusCode: number }; + | { outcome: "success" } + | { outcome: "permanent_failure" } + | { outcome: "rate_limited"; retryAfterHeader: string | undefined } + | { outcome: "transient_failure"; statusCode: number }; export function deliverPayload( target: CallbackTarget, @@ -43,27 +38,25 @@ export function deliverPayload( const statusCode = res.statusCode ?? 0; if (statusCode >= 200 && statusCode < 300) { - resolve({ ok: true }); + resolve({ outcome: "success" }); return; } if (statusCode === 429) { const retryAfterHeader = res.headers["retry-after"]; resolve({ - ok: false, - permanent: false, - statusCode: 429, + outcome: "rate_limited", retryAfterHeader, }); return; } if (statusCode >= 400 && statusCode < 500) { - resolve({ ok: false, permanent: true }); + resolve({ outcome: "permanent_failure" }); return; } - resolve({ ok: false, permanent: false, statusCode }); + resolve({ outcome: "transient_failure", statusCode }); }, ); @@ -73,11 +66,11 @@ export function deliverPayload( req.on("error", (error: NodeJS.ErrnoException) => { if (error.code && PERMANENT_TLS_ERROR_CODES.has(error.code)) { - resolve({ ok: false, permanent: true }); + resolve({ outcome: "permanent_failure" }); return; } - resolve({ ok: false, permanent: false, statusCode: 0 }); + resolve({ outcome: "transient_failure", statusCode: 0 }); }); req.end(signedPayloadJson); diff --git a/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts b/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts index 65a719f5..2bc7cd12 100644 --- a/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts +++ b/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts @@ -1,5 +1,5 @@ import type { SQSRecord } from "aws-lambda"; -import { logger } from "services/logger"; +import { logger } from "@nhs-notify-client-callbacks/logger"; import { sendToDlq } from "services/dlq-sender"; import { changeVisibility } from "services/sqs-visibility"; @@ -14,7 +14,7 @@ export function jitteredBackoffSeconds(receiveCount: number): number { BACKOFF_CAP_SECONDS, ); // eslint-disable-next-line sonarjs/pseudo-random -- jitter for backoff, not security-sensitive - return Math.floor(Math.random() * ceiling); + return Math.max(1, Math.floor(Math.random() * ceiling)); } export function parseRetryAfter(header: string): number { diff --git a/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts b/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts index 142ecd7a..e6c0fcfa 100644 --- a/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts +++ b/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts @@ -9,7 +9,7 @@ import { SecretsManagerClient, } from "@aws-sdk/client-secrets-manager"; import type { CallbackTarget } from "@nhs-notify-client-callbacks/models"; -import { logger } from "services/logger"; +import { logger } from "@nhs-notify-client-callbacks/logger"; const { MTLS_CERT_SECRET_ARN, @@ -18,7 +18,7 @@ const { MTLS_TEST_CERT_S3_KEY, } = process.env; const CERT_EXPIRY_THRESHOLD_MS = - Number(process.env.CERT_EXPIRY_THRESHOLD_MS) || 86_400_000; + Number(process.env.CERT_EXPIRY_THRESHOLD_MS) || 86_400_000; // 24 hours const s3Client = new S3Client({}); const secretsClient = new SecretsManagerClient({}); @@ -142,7 +142,15 @@ export async function buildAgent(target: CallbackTarget): Promise { keepAlive: false, }; - if (target.mtls.enabled) { + const certPinning = target.delivery?.mtls?.certPinning; + + if (certPinning?.enabled && !certPinning.spkiHash) { + throw new Error( + `certPinning.spkiHash is required when certPinning is enabled for target '${target.targetId}'`, + ); + } + + if (target.delivery?.mtls?.enabled) { const material = await getMaterial(); agentOptions.key = material.key; agentOptions.cert = material.cert; @@ -152,14 +160,8 @@ export async function buildAgent(target: CallbackTarget): Promise { } } - if (target.certPinning.enabled) { - const expectedHash = target.certPinning.spkiHash; - - if (!expectedHash) { - throw new Error( - `certPinning.spkiHash is required when certPinning is enabled for target '${target.targetId}'`, - ); - } + if (certPinning?.enabled) { + const expectedHash = certPinning.spkiHash!; /* eslint-disable sonarjs/function-return-type -- checkServerIdentity requires Error|undefined return */ agentOptions.checkServerIdentity = ( diff --git a/lambdas/https-client-lambda/src/services/endpoint-gate.ts b/lambdas/https-client-lambda/src/services/endpoint-gate.ts index 73721246..e54f9e69 100644 --- a/lambdas/https-client-lambda/src/services/endpoint-gate.ts +++ b/lambdas/https-client-lambda/src/services/endpoint-gate.ts @@ -1,6 +1,6 @@ import { type RedisClientType, createClient } from "@redis/client"; import { createHash } from "node:crypto"; -import { logger } from "services/logger"; +import { logger } from "@nhs-notify-client-callbacks/logger"; import admitLuaSrc from "services/admit.lua"; import recordResultLuaSrc from "services/record-result.lua"; @@ -41,6 +41,32 @@ function computeSha1(script: string): string { return createHash("sha1").update(script).digest("hex"); } +async function evalScript( + client: RedisClientType, + script: string, + sha: string, + keys: string[], + args: string[], +): Promise { + const keyCount = keys.length.toString(); + try { + return await client.sendCommand([ + "EVALSHA", + sha, + keyCount, + ...keys, + ...args, + ]); + } catch (error: unknown) { + const isNoScript = + error instanceof Error && error.message.includes("NOSCRIPT"); + if (!isNoScript) { + throw error; + } + return client.sendCommand(["EVAL", script, keyCount, ...keys, ...args]); + } +} + export async function admit( client: RedisClientType, targetId: string, @@ -63,36 +89,17 @@ export async function admit( ]; /* eslint-enable sonarjs/null-dereference */ - let result: string; - if (!admitSha) { admitSha = computeSha1(admitLuaSrc); } - try { - result = await client.sendCommand([ - "EVALSHA", - admitSha, - "2", - rlKey, - cbKey, - ...args, - ]); - } catch (error: unknown) { - const isNoScript = - error instanceof Error && error.message.includes("NOSCRIPT"); - if (!isNoScript) { - throw error; - } - result = await client.sendCommand([ - "EVAL", - admitLuaSrc, - "2", - rlKey, - cbKey, - ...args, - ]); - } + const result = await evalScript( + client, + admitLuaSrc, + admitSha, + [rlKey, cbKey], + args, + ); return JSON.parse(result) as AdmitResult; } @@ -116,34 +123,17 @@ export async function recordResult( config.decayPeriodMs.toString(), ]; - let result: string; - if (!recordResultSha) { recordResultSha = computeSha1(recordResultLuaSrc); } - try { - result = await client.sendCommand([ - "EVALSHA", - recordResultSha, - "1", - cbKey, - ...args, - ]); - } catch (error: unknown) { - const isNoScript = - error instanceof Error && error.message.includes("NOSCRIPT"); - if (!isNoScript) { - throw error; - } - result = await client.sendCommand([ - "EVAL", - recordResultLuaSrc, - "1", - cbKey, - ...args, - ]); - } + const result = await evalScript( + client, + recordResultLuaSrc, + recordResultSha, + [cbKey], + args, + ); return JSON.parse(result) as RecordResultOutcome; } diff --git a/lambdas/https-client-lambda/src/services/logger.ts b/lambdas/https-client-lambda/src/services/logger.ts deleted file mode 100644 index 5c373b25..00000000 --- a/lambdas/https-client-lambda/src/services/logger.ts +++ /dev/null @@ -1 +0,0 @@ -export * from "@nhs-notify-client-callbacks/logger"; diff --git a/lambdas/https-client-lambda/src/services/ssm-applications-map.ts b/lambdas/https-client-lambda/src/services/ssm-applications-map.ts index 73f3ea61..999c23d9 100644 --- a/lambdas/https-client-lambda/src/services/ssm-applications-map.ts +++ b/lambdas/https-client-lambda/src/services/ssm-applications-map.ts @@ -1,12 +1,15 @@ import { GetParameterCommand, SSMClient } from "@aws-sdk/client-ssm"; -import { logger } from "services/logger"; +import { logger } from "@nhs-notify-client-callbacks/logger"; const ssmClient = new SSMClient({}); +const DEFAULT_CACHE_TTL_MS = 300_000; // 5 minutes + let cachedMap: Map | undefined; +let cacheExpiresAt = 0; async function loadMap(): Promise> { - if (cachedMap) { + if (cachedMap && Date.now() < cacheExpiresAt) { return cachedMap; } @@ -38,6 +41,9 @@ async function loadMap(): Promise> { } cachedMap = new Map(Object.entries(parsed)); + const ttlMs = + Number(process.env.APPLICATIONS_MAP_CACHE_TTL_MS) || DEFAULT_CACHE_TTL_MS; + cacheExpiresAt = Date.now() + ttlMs; logger.info("Applications map loaded from SSM", { parameterName: APPLICATIONS_MAP_PARAMETER, }); @@ -59,4 +65,5 @@ export async function getApplicationId(clientId: string): Promise { export function resetCache(): void { cachedMap = undefined; + cacheExpiresAt = 0; } diff --git a/lambdas/mock-webhook-lambda/src/index.ts b/lambdas/mock-webhook-lambda/src/index.ts index 5d68e279..414f66ac 100644 --- a/lambdas/mock-webhook-lambda/src/index.ts +++ b/lambdas/mock-webhook-lambda/src/index.ts @@ -74,6 +74,7 @@ async function buildResponse( const path = event.path ?? eventWithContextFields.rawPath; const isAlbInvocation = Boolean(eventWithContextFields.requestContext?.elb); + const clientCertPresent = Boolean(headers["x-amzn-mtls-clientcert"]); let isMtls = false; if (isAlbInvocation) { const certResult = verifyClientCertificate( @@ -88,6 +89,7 @@ async function buildResponse( } else { logger.info("Mock webhook invoked without mTLS", { isMtls: false, + clientCertPresent, reason: certResult.reason, }); } @@ -98,6 +100,7 @@ async function buildResponse( method: event.httpMethod, hasBody: Boolean(event.body), isMtls, + clientCertPresent, "x-api-key": headers["x-api-key"], "x-hmac-sha256-signature": headers["x-hmac-sha256-signature"], payload: event.body, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 3dde1cbc..c96b8ef9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -279,9 +279,9 @@ importers: '@aws-sdk/client-s3': specifier: catalog:aws version: 3.1029.0 - '@nhs-notify-client-callbacks/config-cache': + '@nhs-notify-client-callbacks/config-subscription-cache': specifier: workspace:* - version: link:../../src/config-cache + version: link:../../src/config-subscription-cache '@nhs-notify-client-callbacks/logger': specifier: workspace:* version: link:../../src/logger @@ -340,9 +340,9 @@ importers: '@aws-sdk/client-ssm': specifier: catalog:aws version: 3.1029.0 - '@nhs-notify-client-callbacks/config-cache': + '@nhs-notify-client-callbacks/config-subscription-cache': specifier: workspace:* - version: link:../../src/config-cache + version: link:../../src/config-subscription-cache '@nhs-notify-client-callbacks/logger': specifier: workspace:* version: link:../../src/logger @@ -458,6 +458,40 @@ importers: specifier: catalog:tools version: 5.9.3 + src/config-subscription-cache: + dependencies: + '@aws-sdk/client-s3': + specifier: catalog:aws + version: 3.1029.0 + '@nhs-notify-client-callbacks/logger': + specifier: workspace:* + version: link:../logger + '@nhs-notify-client-callbacks/models': + specifier: workspace:* + version: link:../models + devDependencies: + '@tsconfig/node22': + specifier: catalog:tools + version: 22.0.5 + '@types/jest': + specifier: catalog:test + version: 30.0.0 + '@types/node': + specifier: catalog:tools + version: 24.12.0 + eslint: + specifier: catalog:lint + version: 9.39.4(jiti@2.6.1) + jest: + specifier: catalog:test + version: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) + ts-jest: + specifier: catalog:test + version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)))(typescript@5.9.3) + typescript: + specifier: catalog:tools + version: 5.9.3 + src/logger: dependencies: pino: diff --git a/src/config-subscription-cache/jest.config.ts b/src/config-subscription-cache/jest.config.ts new file mode 100644 index 00000000..6ecf333b --- /dev/null +++ b/src/config-subscription-cache/jest.config.ts @@ -0,0 +1,14 @@ +import { nodeJestConfig } from "../../jest.config.base.ts"; + +export default { + ...nodeJestConfig, + coverageThreshold: { + global: { + ...nodeJestConfig.coverageThreshold?.global, + branches: 100, + functions: 100, + lines: 100, + statements: 100, + }, + }, +}; diff --git a/src/config-subscription-cache/package.json b/src/config-subscription-cache/package.json new file mode 100644 index 00000000..c7bd0be5 --- /dev/null +++ b/src/config-subscription-cache/package.json @@ -0,0 +1,34 @@ +{ + "exports": { + ".": { + "types": "./src/index.ts", + "default": "./src/index.ts" + } + }, + "dependencies": { + "@aws-sdk/client-s3": "catalog:aws", + "@nhs-notify-client-callbacks/logger": "workspace:*", + "@nhs-notify-client-callbacks/models": "workspace:*" + }, + "devDependencies": { + "@tsconfig/node22": "catalog:tools", + "@types/jest": "catalog:test", + "@types/node": "catalog:tools", + "eslint": "catalog:lint", + "jest": "catalog:test", + "ts-jest": "catalog:test", + "typescript": "catalog:tools" + }, + "engines": { + "node": ">=24.14.1" + }, + "name": "@nhs-notify-client-callbacks/config-subscription-cache", + "private": true, + "scripts": { + "lint": "eslint .", + "lint:fix": "eslint . --fix", + "test:unit": "jest", + "typecheck": "tsc --noEmit" + }, + "version": "0.0.1" +} diff --git a/src/config-subscription-cache/src/__tests__/config-subscription-cache.test.ts b/src/config-subscription-cache/src/__tests__/config-subscription-cache.test.ts new file mode 100644 index 00000000..053b2398 --- /dev/null +++ b/src/config-subscription-cache/src/__tests__/config-subscription-cache.test.ts @@ -0,0 +1,157 @@ +import { GetObjectCommand, NoSuchKey } from "@aws-sdk/client-s3"; +import { ConfigSubscriptionCache } from "config-subscription-cache"; + +const mockS3Send = jest.fn(); +jest.mock("@aws-sdk/client-s3", () => { + const actual = jest.requireActual("@aws-sdk/client-s3"); + return { + ...actual, + S3Client: jest.fn().mockImplementation(() => ({ + send: (...args: unknown[]) => mockS3Send(...args), + })), + }; +}); + +jest.mock("@nhs-notify-client-callbacks/logger", () => ({ + logger: { + info: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + debug: jest.fn(), + }, +})); + +const VALID_CONFIG = { + clientId: "client-1", + subscriptions: [], + targets: [ + { + targetId: "target-1", + type: "API", + invocationEndpoint: "https://webhook.example.invalid", + invocationMethod: "POST", + invocationRateLimit: 10, + apiKey: { headerName: "x-api-key", headerValue: "secret" }, + }, + ], +}; + +const makeS3Response = (body: unknown) => ({ + Body: { + transformToString: jest.fn().mockResolvedValue(JSON.stringify(body)), + }, +}); + +const createCache = (ttlMs = 1000) => { + const { S3Client } = jest.requireMock("@aws-sdk/client-s3"); + return new ConfigSubscriptionCache({ + s3Client: new S3Client(), + bucketName: "test-bucket", + keyPrefix: "client_subscriptions/", + ttlMs, + }); +}; + +describe("ConfigSubscriptionCache", () => { + beforeEach(() => { + mockS3Send.mockReset(); + }); + + it("loads and parses valid config from S3", async () => { + mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); + const cache = createCache(); + + const result = await cache.loadClientConfig("client-1"); + + expect(result).toEqual(VALID_CONFIG); + expect(mockS3Send).toHaveBeenCalledTimes(1); + expect(mockS3Send.mock.calls[0][0]).toBeInstanceOf(GetObjectCommand); + }); + + it("uses the configured key prefix for S3 requests", async () => { + mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); + const cache = createCache(); + + await cache.loadClientConfig("client-1"); + + const command: GetObjectCommand = mockS3Send.mock.calls[0][0]; + expect(command.input.Key).toBe("client_subscriptions/client-1.json"); + expect(command.input.Bucket).toBe("test-bucket"); + }); + + it("returns cached config on subsequent calls", async () => { + mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); + const cache = createCache(); + + await cache.loadClientConfig("client-1"); + await cache.loadClientConfig("client-1"); + + expect(mockS3Send).toHaveBeenCalledTimes(1); + }); + + it("re-fetches from S3 after TTL expiry", async () => { + jest.useFakeTimers(); + jest.setSystemTime(new Date("2026-01-01T10:00:00Z")); + + mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); + const cache = createCache(1000); + + await cache.loadClientConfig("client-1"); + + jest.advanceTimersByTime(1001); + + await cache.loadClientConfig("client-1"); + + expect(mockS3Send).toHaveBeenCalledTimes(2); + + jest.useRealTimers(); + }); + + it("returns undefined when S3 key does not exist", async () => { + mockS3Send.mockRejectedValue(new NoSuchKey({ $metadata: {}, message: "" })); + const cache = createCache(); + + const result = await cache.loadClientConfig("missing-client"); + + expect(result).toBeUndefined(); + }); + + it("throws when config fails validation", async () => { + const invalidConfig = { ...VALID_CONFIG, targets: [{ invalid: true }] }; + mockS3Send.mockResolvedValue(makeS3Response(invalidConfig)); + const cache = createCache(); + + await expect(cache.loadClientConfig("client-1")).rejects.toThrow( + "Invalid client config for 'client-1'", + ); + }); + + it("throws when S3 body is empty", async () => { + mockS3Send.mockResolvedValue({ Body: undefined }); + const cache = createCache(); + + await expect(cache.loadClientConfig("client-1")).rejects.toThrow( + "S3 response body was empty for client 'client-1'", + ); + }); + + it("propagates non-NoSuchKey S3 errors", async () => { + mockS3Send.mockRejectedValue(new Error("S3 access denied")); + const cache = createCache(); + + await expect(cache.loadClientConfig("client-1")).rejects.toThrow( + "S3 access denied", + ); + }); + + it("clears cache on reset", async () => { + mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); + const cache = createCache(); + + await cache.loadClientConfig("client-1"); + cache.reset(); + await cache.loadClientConfig("client-1"); + + expect(mockS3Send).toHaveBeenCalledTimes(2); + }); +}); diff --git a/src/config-subscription-cache/src/config-subscription-cache.ts b/src/config-subscription-cache/src/config-subscription-cache.ts new file mode 100644 index 00000000..0ce3547c --- /dev/null +++ b/src/config-subscription-cache/src/config-subscription-cache.ts @@ -0,0 +1,110 @@ +import { GetObjectCommand, NoSuchKey, S3Client } from "@aws-sdk/client-s3"; +import type { ClientSubscriptionConfiguration } from "@nhs-notify-client-callbacks/models"; +import { parseClientSubscriptionConfiguration } from "@nhs-notify-client-callbacks/models"; +import { logger } from "@nhs-notify-client-callbacks/logger"; + +type CacheEntry = { + value: ClientSubscriptionConfiguration; + expiresAt: number; +}; + +export type ConfigSubscriptionCacheOptions = { + s3Client: S3Client; + bucketName: string; + keyPrefix: string; + ttlMs: number; +}; + +export class ConfigSubscriptionCache { + private readonly cache = new Map(); + + private readonly s3Client: S3Client; + + private readonly bucketName: string; + + private readonly keyPrefix: string; + + private readonly ttlMs: number; + + constructor(options: ConfigSubscriptionCacheOptions) { + this.s3Client = options.s3Client; + this.bucketName = options.bucketName; + this.keyPrefix = options.keyPrefix; + this.ttlMs = options.ttlMs; + } + + async loadClientConfig( + clientId: string, + ): Promise { + const cached = this.getCached(clientId); + if (cached) { + return cached; + } + + const raw = await this.fetchFromS3(clientId); + if (raw === undefined) { + return undefined; + } + + const parsed = JSON.parse(raw) as unknown; + const result = parseClientSubscriptionConfiguration(parsed); + + if (!result.success) { + throw new Error( + `Invalid client config for '${clientId}': ${result.error.message}`, + ); + } + + this.cache.set(clientId, { + value: result.data, + expiresAt: Date.now() + this.ttlMs, + }); + + logger.info("Client config loaded from S3", { clientId }); + return result.data; + } + + reset(): void { + this.cache.clear(); + } + + // eslint-disable-next-line sonarjs/function-return-type -- cache lookup returns T | undefined + private getCached( + clientId: string, + ): ClientSubscriptionConfiguration | undefined { + const entry = this.cache.get(clientId); + + if (entry && entry.expiresAt <= Date.now()) { + this.cache.delete(clientId); + return undefined; + } + + return entry?.value; + } + + private async fetchFromS3(clientId: string): Promise { + try { + const response = await this.s3Client.send( + new GetObjectCommand({ + Bucket: this.bucketName, + Key: `${this.keyPrefix}${clientId}.json`, + }), + ); + + if (!response.Body) { + throw new Error(`S3 response body was empty for client '${clientId}'`); + } + + return await response.Body.transformToString(); + } catch (error) { + if (error instanceof NoSuchKey) { + logger.info( + "No config found in S3 for client — events will be filtered out", + { clientId }, + ); + return undefined; + } + throw error; + } + } +} diff --git a/src/config-subscription-cache/src/index.ts b/src/config-subscription-cache/src/index.ts new file mode 100644 index 00000000..39a4501b --- /dev/null +++ b/src/config-subscription-cache/src/index.ts @@ -0,0 +1,2 @@ +export { ConfigSubscriptionCache } from "./config-subscription-cache"; +export type { ConfigSubscriptionCacheOptions } from "./config-subscription-cache"; diff --git a/src/config-subscription-cache/tsconfig.json b/src/config-subscription-cache/tsconfig.json new file mode 100644 index 00000000..a50e6fc0 --- /dev/null +++ b/src/config-subscription-cache/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "isolatedModules": true, + "paths": { + "*": [ + "./src/*" + ] + } + }, + "extends": "../../tsconfig.base.json", + "include": [ + "src/**/*" + ] +} diff --git a/src/models/src/__tests__/client-config-schema.test.ts b/src/models/src/__tests__/client-config-schema.test.ts index d166c037..d9d68a40 100644 --- a/src/models/src/__tests__/client-config-schema.test.ts +++ b/src/models/src/__tests__/client-config-schema.test.ts @@ -47,8 +47,12 @@ const createValidConfig = (): ClientSubscriptionConfiguration => ({ invocationMethod: "POST", invocationRateLimit: 10, apiKey: { headerName: "x-api-key", headerValue: "secret" }, - mtls: { enabled: true }, - certPinning: { enabled: true, spkiHash: VALID_SPKI_HASH }, + delivery: { + mtls: { + enabled: true, + certPinning: { enabled: true, spkiHash: VALID_SPKI_HASH }, + }, + }, }, ], }); @@ -155,6 +159,7 @@ describe("parseClientSubscriptionConfiguration", () => { it("parses a valid config with mtls, certPinning, and delivery fields", () => { const config = createValidConfig(); config.targets[0].delivery = { + ...config.targets[0].delivery, maxRetryDurationSeconds: 7200, circuitBreaker: { enabled: true }, }; @@ -165,10 +170,11 @@ describe("parseClientSubscriptionConfiguration", () => { }); }); - it("returns a failed parse result when mtls field is missing", () => { + it("returns a failed parse result when delivery.mtls has invalid shape", () => { const config = createValidConfig(); - const target = config.targets[0] as Record; - delete target.mtls; + (config.targets[0] as Record).delivery = { + mtls: { enabled: "not-a-boolean" }, + }; const result = expectFailedParse( parseClientSubscriptionConfiguration(config), @@ -177,7 +183,7 @@ describe("parseClientSubscriptionConfiguration", () => { expect(result.error.issues).toEqual( expect.arrayContaining([ expect.objectContaining({ - path: expect.arrayContaining(["targets", 0, "mtls"]), + path: expect.arrayContaining(["targets", 0, "delivery"]), }), ]), ); @@ -185,7 +191,10 @@ describe("parseClientSubscriptionConfiguration", () => { it("returns a failed parse result when spkiHash has an invalid pattern", () => { const config = createValidConfig(); - config.targets[0].certPinning.spkiHash = "not-a-valid-hash"; + config.targets[0].delivery!.mtls!.certPinning = { + enabled: true, + spkiHash: "not-a-valid-hash", + }; const result = expectFailedParse( parseClientSubscriptionConfiguration(config), @@ -202,7 +211,7 @@ describe("parseClientSubscriptionConfiguration", () => { it("returns a failed parse result when certPinning.enabled is true without spkiHash", () => { const config = createValidConfig(); - config.targets[0].certPinning = { enabled: true }; + config.targets[0].delivery!.mtls!.certPinning = { enabled: true }; const result = expectFailedParse( parseClientSubscriptionConfiguration(config), diff --git a/src/models/src/client-config-schema.ts b/src/models/src/client-config-schema.ts index 5da8479d..cdc941f1 100644 --- a/src/models/src/client-config-schema.ts +++ b/src/models/src/client-config-schema.ts @@ -46,10 +46,6 @@ const targetSchema = z.object({ headerName: z.string(), headerValue: z.string(), }), - mtls: z.object({ - enabled: z.boolean(), - }), - certPinning: certPinningSchema, delivery: z .object({ maxRetryDurationSeconds: z.number().min(60).max(43_200).optional(), @@ -58,6 +54,12 @@ const targetSchema = z.object({ enabled: z.boolean(), }) .optional(), + mtls: z + .object({ + enabled: z.boolean(), + certPinning: certPinningSchema.optional(), + }) + .optional(), }) .optional(), }); diff --git a/src/models/src/client-config.ts b/src/models/src/client-config.ts index 9b434596..4d1796d1 100644 --- a/src/models/src/client-config.ts +++ b/src/models/src/client-config.ts @@ -15,18 +15,18 @@ export type CallbackTarget = { headerName: string; headerValue: string; }; - mtls: { - enabled: boolean; - }; - certPinning: { - enabled: boolean; - spkiHash?: string; - }; delivery?: { maxRetryDurationSeconds?: number; circuitBreaker?: { enabled: boolean; }; + mtls?: { + enabled: boolean; + certPinning?: { + enabled: boolean; + spkiHash?: string; + }; + }; }; }; diff --git a/tools/client-subscriptions-management/src/__tests__/domain/client-subscription-builder.test.ts b/tools/client-subscriptions-management/src/__tests__/domain/client-subscription-builder.test.ts index 3b535fbd..0ec7c4fc 100644 --- a/tools/client-subscriptions-management/src/__tests__/domain/client-subscription-builder.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/domain/client-subscription-builder.test.ts @@ -32,8 +32,9 @@ describe("buildTarget", () => { invocationMethod: "POST", invocationRateLimit: 10, apiKey: { headerName: "x-api-key", headerValue: "secret" }, - mtls: { enabled: false }, - certPinning: { enabled: false }, + delivery: { + mtls: { enabled: false, certPinning: { enabled: false } }, + }, }); expect(result.targetId).toMatch(UUID_REGEX); }); @@ -75,18 +76,16 @@ describe("buildTarget", () => { ); }); - it("emits warning when certPinning enabled without spkiHash", () => { - buildTarget({ - apiEndpoint: "https://example.com/webhook", - apiKey: "secret", - rateLimit: 10, - mtls: { enabled: true }, - certPinning: { enabled: true }, - }); - - expect(warnSpy).toHaveBeenCalledWith( - expect.stringContaining("no SPKI hash is stored"), - ); + it("throws when certPinning enabled without spkiHash", () => { + expect(() => + buildTarget({ + apiEndpoint: "https://example.com/webhook", + apiKey: "secret", + rateLimit: 10, + mtls: { enabled: true }, + certPinning: { enabled: true }, + }), + ).toThrow("Certificate pinning cannot be enabled without an SPKI hash"); }); it("emits warning when certPinning enabled but mtls disabled", () => { @@ -95,7 +94,10 @@ describe("buildTarget", () => { apiKey: "secret", rateLimit: 10, mtls: { enabled: false }, - certPinning: { enabled: true }, + certPinning: { + enabled: true, + spkiHash: "dGVzdGhhc2g9PT09PT09PT09PT09PT09PT09PT09PQ==", + }, }); expect(warnSpy).toHaveBeenCalledWith( @@ -115,7 +117,6 @@ describe("buildTarget", () => { expect(warnSpy).not.toHaveBeenCalled(); }); }); - describe("buildMessageStatusSubscription", () => { it("builds message status subscription", () => { const result = buildMessageStatusSubscription({ diff --git a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-certificate.test.ts b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-certificate.test.ts index 9deb7065..a902ed0c 100644 --- a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-certificate.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-certificate.test.ts @@ -74,8 +74,12 @@ describe("targets-set-certificate CLI", () => { expect.objectContaining({ targets: [ expect.objectContaining({ - certPinning: expect.objectContaining({ - spkiHash: EXPECTED_SPKI_HASH, + delivery: expect.objectContaining({ + mtls: expect.objectContaining({ + certPinning: expect.objectContaining({ + spkiHash: EXPECTED_SPKI_HASH, + }), + }), }), }), ], diff --git a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-mtls.test.ts b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-mtls.test.ts index 0703f9d3..a0e53092 100644 --- a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-mtls.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-mtls.test.ts @@ -67,14 +67,20 @@ describe("targets-set-mtls CLI", () => { expect(mockPutClientConfig).toHaveBeenCalledWith( "client-1", expect.objectContaining({ - targets: [expect.objectContaining({ mtls: { enabled: true } })], + targets: [ + expect.objectContaining({ + delivery: expect.objectContaining({ + mtls: expect.objectContaining({ enabled: true }), + }), + }), + ], }), false, ); }); - it("disables mTLS with --disable flag and emits ANSI warning", async () => { - await cli.main([...baseArgs, "--disable"]); + it("disables mTLS with --no-enable flag and emits ANSI warning", async () => { + await cli.main([...baseArgs, "--no-enable"]); expect(console.warn).toHaveBeenCalledWith( expect.stringContaining("Disabling mTLS"), @@ -82,7 +88,13 @@ describe("targets-set-mtls CLI", () => { expect(mockPutClientConfig).toHaveBeenCalledWith( "client-1", expect.objectContaining({ - targets: [expect.objectContaining({ mtls: { enabled: false } })], + targets: [ + expect.objectContaining({ + delivery: expect.objectContaining({ + mtls: expect.objectContaining({ enabled: false }), + }), + }), + ], }), false, ); diff --git a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-pinning.test.ts b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-pinning.test.ts index f2cfd9ea..051dcbdd 100644 --- a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-pinning.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/targets-set-pinning.test.ts @@ -25,7 +25,12 @@ jest.mock("src/format", () => ({ })); const target = createTarget({ - certPinning: { enabled: true, spkiHash: "existing-hash" }, + delivery: { + mtls: { + enabled: true, + certPinning: { enabled: true, spkiHash: "existing-hash" }, + }, + }, }); const config = createClientSubscriptionConfig({ targets: [target] }); const mockCreateRepository = getMockCreateRepository(); @@ -50,7 +55,12 @@ describe("targets-set-pinning CLI", () => { createClientSubscriptionConfig({ targets: [ createTarget({ - certPinning: { enabled: true, spkiHash: "existing-hash" }, + delivery: { + mtls: { + enabled: true, + certPinning: { enabled: true, spkiHash: "existing-hash" }, + }, + }, }), ], }), @@ -79,7 +89,11 @@ describe("targets-set-pinning CLI", () => { expect.objectContaining({ targets: [ expect.objectContaining({ - certPinning: { enabled: true, spkiHash: "existing-hash" }, + delivery: expect.objectContaining({ + mtls: expect.objectContaining({ + certPinning: { enabled: true, spkiHash: "existing-hash" }, + }), + }), }), ], }), @@ -87,8 +101,8 @@ describe("targets-set-pinning CLI", () => { ); }); - it("disables pinning with --disable flag and emits ANSI warning", async () => { - await cli.main([...baseArgs, "--disable"]); + it("disables pinning with --no-enable flag and emits ANSI warning", async () => { + await cli.main([...baseArgs, "--no-enable"]); expect(console.warn).toHaveBeenCalledWith( expect.stringContaining("Disabling certificate pinning"), @@ -98,7 +112,11 @@ describe("targets-set-pinning CLI", () => { expect.objectContaining({ targets: [ expect.objectContaining({ - certPinning: { enabled: false, spkiHash: "existing-hash" }, + delivery: expect.objectContaining({ + mtls: expect.objectContaining({ + certPinning: { enabled: false, spkiHash: "existing-hash" }, + }), + }), }), ], }), @@ -107,11 +125,13 @@ describe("targets-set-pinning CLI", () => { }); it("preserves existing spkiHash when disabling", async () => { - await cli.main([...baseArgs, "--disable"]); + await cli.main([...baseArgs, "--no-enable"]); const putCall = mockPutClientConfig.mock.calls[0]; const updatedTarget = putCall[1].targets[0]; - expect(updatedTarget.certPinning.spkiHash).toBe("existing-hash"); + expect(updatedTarget.delivery.mtls.certPinning.spkiHash).toBe( + "existing-hash", + ); }); it("passes dry-run to putClientConfig", async () => { @@ -137,7 +157,12 @@ describe("targets-set-pinning CLI", () => { createClientSubscriptionConfig({ targets: [ createTarget({ - certPinning: { enabled: false }, + delivery: { + mtls: { + enabled: true, + certPinning: { enabled: false }, + }, + }, }), ], }), diff --git a/tools/client-subscriptions-management/src/__tests__/helpers/client-subscription-fixtures.ts b/tools/client-subscriptions-management/src/__tests__/helpers/client-subscription-fixtures.ts index ee41bd09..de12586e 100644 --- a/tools/client-subscriptions-management/src/__tests__/helpers/client-subscription-fixtures.ts +++ b/tools/client-subscriptions-management/src/__tests__/helpers/client-subscription-fixtures.ts @@ -24,8 +24,6 @@ export const createTarget = ( headerValue: "secret", ...overrides.apiKey, }, - mtls: { enabled: false }, - certPinning: { enabled: false }, ...overrides, }); diff --git a/tools/client-subscriptions-management/src/domain/client-subscription-builder.ts b/tools/client-subscriptions-management/src/domain/client-subscription-builder.ts index 43edbc5a..288c0d72 100644 --- a/tools/client-subscriptions-management/src/domain/client-subscription-builder.ts +++ b/tools/client-subscriptions-management/src/domain/client-subscription-builder.ts @@ -47,7 +47,9 @@ export function buildTarget(args: BuildTargetArgs): CallbackTarget { } if (certPinning.enabled && !certPinning.spkiHash) { - warnings.push("Certificate pinning is enabled but no SPKI hash is stored"); + throw new Error( + "Certificate pinning cannot be enabled without an SPKI hash. Run 'targets-set-certificate' first.", + ); } if (!mtls.enabled && certPinning.enabled) { @@ -68,8 +70,12 @@ export function buildTarget(args: BuildTargetArgs): CallbackTarget { headerName: args.apiKeyHeaderName ?? "x-api-key", headerValue: args.apiKey, }, - mtls, - certPinning, + delivery: { + mtls: { + ...mtls, + certPinning, + }, + }, }; } diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/clients-put.ts b/tools/client-subscriptions-management/src/entrypoint/cli/clients-put.ts index 2e5dc849..ce3d1cca 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/clients-put.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/clients-put.ts @@ -56,7 +56,7 @@ export const handler: CliCommand["handler"] = async (argv) => { return; } - // eslint-disable-next-line security/detect-non-literal-fs-filename + // eslint-disable-next-line security/detect-non-literal-fs-filename -- path is provided directly by the operator via CLI arg const rawJson = argv.json ?? readFileSync(argv.file!, "utf8"); let parsed: unknown; diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/helper.ts b/tools/client-subscriptions-management/src/entrypoint/cli/helper.ts index 14e998dd..23070926 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/helper.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/helper.ts @@ -1,3 +1,7 @@ +import type { + CallbackTarget, + ClientSubscriptionConfiguration, +} from "@nhs-notify-client-callbacks/models"; import { createRepository as createRepositoryFromOptions, createSsmApplicationsMapRepository as createSsmApplicationsMapRepositoryFromOptions, @@ -124,6 +128,14 @@ export const clientIdOption = { }, }; +export const targetIdOption = { + "target-id": { + type: "string" as const, + demandOption: true as const, + description: "Target identifier", + }, +}; + export const writeOptions = { "dry-run": { type: "boolean" as const, @@ -159,3 +171,30 @@ export const createSsmApplicationsMapRepository = (argv: SsmCliArgs) => { profile, }); }; + +export async function requireClientConfig( + repository: { + getClientConfig: ( + clientId: string, + ) => Promise; + }, + clientId: string, +): Promise { + const config = await repository.getClientConfig(clientId); + if (!config) { + throw new Error(`No configuration found for client: ${clientId}`); + } + return config; +} + +export function requireTargetConfig( + config: ClientSubscriptionConfiguration, + clientId: string, + targetId: string, +): CallbackTarget { + const target = config.targets.find((t) => t.targetId === targetId); + if (!target) { + throw new Error(`Target '${targetId}' not found for client '${clientId}'`); + } + return target; +} diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/targets-del.ts b/tools/client-subscriptions-management/src/entrypoint/cli/targets-del.ts index 6fe56ac2..7f2c3e19 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/targets-del.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/targets-del.ts @@ -7,6 +7,7 @@ import { commonOptions, createRepository, runCommand, + targetIdOption, writeOptions, } from "src/entrypoint/cli/helper"; import { formatClientConfig } from "src/format"; @@ -20,12 +21,8 @@ export const builder = (yargs: Argv) => yargs.options({ ...commonOptions, ...clientIdOption, + ...targetIdOption, ...writeOptions, - "target-id": { - type: "string", - demandOption: true, - description: "Target identifier to delete", - }, }); export const handler: CliCommand["handler"] = async (argv) => { diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-certificate.ts b/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-certificate.ts index ea32f8b9..857d2991 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-certificate.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-certificate.ts @@ -8,7 +8,10 @@ import { clientIdOption, commonOptions, createRepository, + requireClientConfig, + requireTargetConfig, runCommand, + targetIdOption, writeOptions, } from "src/entrypoint/cli/helper"; import { formatClientConfig } from "src/format"; @@ -23,12 +26,8 @@ export const builder = (yargs: Argv) => yargs.options({ ...commonOptions, ...clientIdOption, + ...targetIdOption, ...writeOptions, - "target-id": { - type: "string", - demandOption: true, - description: "Target identifier to update", - }, "pem-file": { type: "string", demandOption: true, @@ -54,23 +53,24 @@ export const handler: CliCommand["handler"] = async ( console.log(`Extracted SPKI hash: ${spkiHash}`); const repository = await createRepository(argv); - const config = await repository.getClientConfig(argv["client-id"]); - - if (!config) { - throw new Error(`No configuration found for client: ${argv["client-id"]}`); - } - - const target = config.targets.find((t) => t.targetId === argv["target-id"]); - - if (!target) { - throw new Error( - `Target '${argv["target-id"]}' not found for client '${argv["client-id"]}'`, - ); - } + const config = await requireClientConfig(repository, argv["client-id"]); + const target = requireTargetConfig( + config, + argv["client-id"], + argv["target-id"], + ); - target.certPinning = { - ...target.certPinning, - spkiHash, + const mtls = target.delivery?.mtls ?? { enabled: false }; + const certPinning = mtls.certPinning ?? { enabled: false }; + target.delivery = { + ...target.delivery, + mtls: { + ...mtls, + certPinning: { + ...certPinning, + spkiHash, + }, + }, }; const result = await repository.putClientConfig( diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-mtls.ts b/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-mtls.ts index b143b3d8..ae9127ef 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-mtls.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-mtls.ts @@ -7,7 +7,10 @@ import { clientIdOption, commonOptions, createRepository, + requireClientConfig, + requireTargetConfig, runCommand, + targetIdOption, writeOptions, } from "src/entrypoint/cli/helper"; import { formatClientConfig } from "src/format"; @@ -15,43 +18,27 @@ import { formatClientConfig } from "src/format"; type TargetsSetMtlsArgs = ClientCliArgs & WriteCliArgs & { "target-id": string; - enable?: boolean; - disable?: boolean; + enable: boolean; }; export const builder = (yargs: Argv) => - yargs - .options({ - ...commonOptions, - ...clientIdOption, - ...writeOptions, - "target-id": { - type: "string", - demandOption: true, - description: "Target identifier to update", - }, - enable: { - type: "boolean", - description: "Enable mTLS for this target", - conflicts: "disable", - }, - disable: { - type: "boolean", - description: "Disable mTLS for this target", - conflicts: "enable", - }, - }) - .check((argv) => { - if (!argv.enable && !argv.disable) { - throw new Error("Specify either --enable or --disable"); - } - return true; - }); + yargs.options({ + ...commonOptions, + ...clientIdOption, + ...targetIdOption, + ...writeOptions, + enable: { + type: "boolean", + demandOption: true, + description: + "Enable or disable mTLS for this target (use --no-enable to disable)", + }, + }); export const handler: CliCommand["handler"] = async ( argv, ) => { - const enabled = argv.enable === true; + const enabled = argv.enable; if (!enabled) { console.warn( @@ -62,21 +49,20 @@ export const handler: CliCommand["handler"] = async ( } const repository = await createRepository(argv); - const config = await repository.getClientConfig(argv["client-id"]); - - if (!config) { - throw new Error(`No configuration found for client: ${argv["client-id"]}`); - } - - const target = config.targets.find((t) => t.targetId === argv["target-id"]); - - if (!target) { - throw new Error( - `Target '${argv["target-id"]}' not found for client '${argv["client-id"]}'`, - ); - } + const config = await requireClientConfig(repository, argv["client-id"]); + const target = requireTargetConfig( + config, + argv["client-id"], + argv["target-id"], + ); - target.mtls = { enabled }; + target.delivery = { + ...target.delivery, + mtls: { + ...target.delivery?.mtls, + enabled, + }, + }; const result = await repository.putClientConfig( argv["client-id"], diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-pinning.ts b/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-pinning.ts index 1e6a6c85..6816d85f 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-pinning.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/targets-set-pinning.ts @@ -7,7 +7,10 @@ import { clientIdOption, commonOptions, createRepository, + requireClientConfig, + requireTargetConfig, runCommand, + targetIdOption, writeOptions, } from "src/entrypoint/cli/helper"; import { formatClientConfig } from "src/format"; @@ -15,72 +18,57 @@ import { formatClientConfig } from "src/format"; type TargetsSetPinningArgs = ClientCliArgs & WriteCliArgs & { "target-id": string; - enable?: boolean; - disable?: boolean; + enable: boolean; }; export const builder = (yargs: Argv) => - yargs - .options({ - ...commonOptions, - ...clientIdOption, - ...writeOptions, - "target-id": { - type: "string", - demandOption: true, - description: "Target identifier to update", - }, - enable: { - type: "boolean", - description: "Enable certificate pinning for this target", - conflicts: "disable", - }, - disable: { - type: "boolean", - description: "Disable certificate pinning for this target", - conflicts: "enable", - }, - }) - .check((argv) => { - if (!argv.enable && !argv.disable) { - throw new Error("Specify either --enable or --disable"); - } - return true; - }); + yargs.options({ + ...commonOptions, + ...clientIdOption, + ...targetIdOption, + ...writeOptions, + enable: { + type: "boolean", + demandOption: true, + description: + "Enable or disable certificate pinning for this target (use --no-enable to disable)", + }, + }); export const handler: CliCommand["handler"] = async ( argv, ) => { - const enabled = argv.enable === true; + const enabled = argv.enable; if (!enabled) { console.warn(pc.bold(pc.red("WARNING: Disabling certificate pinning"))); } const repository = await createRepository(argv); - const config = await repository.getClientConfig(argv["client-id"]); - - if (!config) { - throw new Error(`No configuration found for client: ${argv["client-id"]}`); - } - - const target = config.targets.find((t) => t.targetId === argv["target-id"]); - - if (!target) { - throw new Error( - `Target '${argv["target-id"]}' not found for client '${argv["client-id"]}'`, - ); - } + const config = await requireClientConfig(repository, argv["client-id"]); + const target = requireTargetConfig( + config, + argv["client-id"], + argv["target-id"], + ); - if (enabled && !target.certPinning.spkiHash) { + if (enabled && !target.delivery?.mtls?.certPinning?.spkiHash) { throw new Error( `Target '${argv["target-id"]}' has no SPKI hash stored. Run 'targets-set-certificate' first to configure a certificate hash before enabling pinning.`, ); } - target.certPinning = { - ...target.certPinning, - enabled, + const mtls = target.delivery?.mtls ?? { enabled: false }; + const certPinning = mtls.certPinning ?? { enabled: false }; + target.delivery = { + ...target.delivery, + mtls: { + ...mtls, + certPinning: { + ...certPinning, + enabled, + }, + }, }; const result = await repository.putClientConfig( From f6c053246f1a65ae6ae9a82397930070911b677a Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Mon, 20 Apr 2026 11:00:22 +0100 Subject: [PATCH 11/65] CCM-16073 - PR feedback --- lambdas/https-client-lambda/package.json | 3 + .../src/__tests__/endpoint-gate.test.ts | 127 ++++---- .../src/__tests__/handler.test.ts | 21 ++ lambdas/https-client-lambda/src/handler.ts | 4 +- .../src/services/admit.lua | 270 ++++++++++++------ .../src/services/config-loader.ts | 3 +- .../src/services/endpoint-gate.ts | 129 +++++++-- .../src/services/record-result.lua | 210 +++++++++----- pnpm-lock.yaml | 15 + pnpm-workspace.yaml | 2 + 10 files changed, 546 insertions(+), 238 deletions(-) diff --git a/lambdas/https-client-lambda/package.json b/lambdas/https-client-lambda/package.json index f03e3485..d878392f 100644 --- a/lambdas/https-client-lambda/package.json +++ b/lambdas/https-client-lambda/package.json @@ -1,9 +1,12 @@ { "dependencies": { + "@aws-crypto/sha256-js": "catalog:aws", "@aws-sdk/client-s3": "catalog:aws", "@aws-sdk/client-secrets-manager": "catalog:aws", "@aws-sdk/client-sqs": "catalog:aws", "@aws-sdk/client-ssm": "catalog:aws", + "@aws-sdk/credential-providers": "catalog:aws", + "@smithy/signature-v4": "catalog:aws", "@nhs-notify-client-callbacks/config-subscription-cache": "workspace:*", "@nhs-notify-client-callbacks/logger": "workspace:*", "@nhs-notify-client-callbacks/models": "workspace:*", diff --git a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts index cc42a311..620d77be 100644 --- a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts @@ -9,14 +9,30 @@ import { jest.mock("@nhs-notify-client-callbacks/logger"); +const mockPresign = jest.fn().mockResolvedValue({ + hostname: "cache.example.invalid", + path: "/", + query: { "X-Amz-Signature": "mock-sig" }, +}); + +jest.mock("@smithy/signature-v4", () => ({ + SignatureV4: jest.fn().mockImplementation(() => ({ presign: mockPresign })), +})); + +jest.mock("@aws-sdk/credential-providers", () => ({ + fromNodeProviderChain: jest.fn(), +})); + const mockSendCommand = jest.fn(); const mockConnect = jest.fn().mockResolvedValue(undefined); +const mockDisconnect = jest.fn().mockResolvedValue(undefined); const mockOn = jest.fn(); jest.mock("@redis/client", () => ({ createClient: jest.fn(() => ({ sendCommand: mockSendCommand, connect: mockConnect, + disconnect: mockDisconnect, on: mockOn, isOpen: true, })), @@ -35,6 +51,7 @@ const defaultConfig: EndpointGateConfig = { const mockRedis = { sendCommand: mockSendCommand, connect: mockConnect, + disconnect: mockDisconnect, on: mockOn, isOpen: true, } as never; @@ -46,9 +63,7 @@ beforeEach(() => { describe("admit", () => { it("returns allowed when tokens available", async () => { - mockSendCommand.mockResolvedValueOnce( - JSON.stringify({ allowed: true, probe: false, effectiveRate: 10 }), - ); + mockSendCommand.mockResolvedValueOnce([1, "allowed", 0, 10]); const result = await admit(mockRedis, "target-1", 10, true, defaultConfig); @@ -59,29 +74,20 @@ describe("admit", () => { }); it("returns rate_limited when tokens exhausted", async () => { - mockSendCommand.mockResolvedValueOnce( - JSON.stringify({ - allowed: false, - reason: "rate_limited", - retryAfterMs: 500, - effectiveRate: 10, - }), - ); + mockSendCommand.mockResolvedValueOnce([0, "rate_limited", 1000, 10]); const result = await admit(mockRedis, "target-1", 10, false, defaultConfig); expect(result).toEqual({ allowed: false, reason: "rate_limited", - retryAfterMs: 500, + retryAfterMs: 1000, effectiveRate: 10, }); }); - it("returns circuit_open with probe slot available", async () => { - mockSendCommand.mockResolvedValueOnce( - JSON.stringify({ allowed: true, probe: true, effectiveRate: 0 }), - ); + it("returns allowed with probe flag when circuit is open but probe slot is available", async () => { + mockSendCommand.mockResolvedValueOnce([1, "probe", 0, 0]); const result = await admit(mockRedis, "target-1", 10, true, defaultConfig); @@ -89,14 +95,7 @@ describe("admit", () => { }); it("returns circuit_open without probe slot", async () => { - mockSendCommand.mockResolvedValueOnce( - JSON.stringify({ - allowed: false, - reason: "circuit_open", - retryAfterMs: 30_000, - effectiveRate: 0, - }), - ); + mockSendCommand.mockResolvedValueOnce([0, "circuit_open", 30_000, 0]); const result = await admit(mockRedis, "target-1", 10, true, defaultConfig); @@ -111,9 +110,7 @@ describe("admit", () => { it("falls back to EVAL on NOSCRIPT error", async () => { mockSendCommand .mockRejectedValueOnce(new Error("NOSCRIPT No matching script")) - .mockResolvedValueOnce( - JSON.stringify({ allowed: true, probe: false, effectiveRate: 10 }), - ); + .mockResolvedValueOnce([1, "allowed", 0, 10]); const result = await admit(mockRedis, "target-1", 10, true, defaultConfig); @@ -137,36 +134,31 @@ describe("admit", () => { ).rejects.toThrow("Connection refused"); }); - it("passes cbEnabled=0 when circuit breaker is disabled", async () => { - mockSendCommand.mockResolvedValueOnce( - JSON.stringify({ allowed: true, probe: false, effectiveRate: 10 }), - ); + it("passes cbProbeIntervalMs=0 when circuit breaker is disabled", async () => { + mockSendCommand.mockResolvedValueOnce([1, "allowed", 0, 10]); await admit(mockRedis, "target-1", 10, false, defaultConfig); + // EVALSHA layout: [EVALSHA, sha, keyCount, cbKey, rlKey, now, capacity, refillPerSec, cooldownMs, decayPeriodMs, cbWindowPeriodMs, cbProbeIntervalMs] const args = mockSendCommand.mock.calls[0]![0] as string[]; - const cbEnabledArg = args[9]; - expect(cbEnabledArg).toBe("0"); + const cbProbeIntervalArg = args[11]; + expect(cbProbeIntervalArg).toBe("0"); }); - it("passes correct keys for target-specific hashes", async () => { - mockSendCommand.mockResolvedValueOnce( - JSON.stringify({ allowed: true, probe: false, effectiveRate: 5 }), - ); + it("passes cbKey first, rlKey second", async () => { + mockSendCommand.mockResolvedValueOnce([1, "allowed", 0, 5]); await admit(mockRedis, "my-target", 5, true, defaultConfig); const args = mockSendCommand.mock.calls[0]![0] as string[]; - expect(args[3]).toBe("rl:my-target"); - expect(args[4]).toBe("cb:my-target"); + expect(args[3]).toBe("cb:my-target"); + expect(args[4]).toBe("rl:my-target"); }); }); describe("recordResult", () => { it("returns closed on success below threshold", async () => { - mockSendCommand.mockResolvedValueOnce( - JSON.stringify({ ok: true, state: "closed" }), - ); + mockSendCommand.mockResolvedValueOnce([1, "closed"]); const result = await recordResult( mockRedis, @@ -182,9 +174,7 @@ describe("recordResult", () => { }); it("returns opened when failure crosses threshold", async () => { - mockSendCommand.mockResolvedValueOnce( - JSON.stringify({ ok: false, state: "opened" }), - ); + mockSendCommand.mockResolvedValueOnce([0, "opened"]); const result = await recordResult( mockRedis, @@ -196,10 +186,23 @@ describe("recordResult", () => { expect(result).toEqual({ ok: false, state: "opened" }); }); + it("returns failed when failure is below threshold", async () => { + mockSendCommand.mockResolvedValueOnce([0, "failed"]); + + const result = await recordResult( + mockRedis, + "target-1", + false, + defaultConfig, + ); + + expect(result).toEqual({ ok: false, state: "failed" }); + }); + it("falls back to EVAL on NOSCRIPT error", async () => { mockSendCommand .mockRejectedValueOnce(new Error("NOSCRIPT No matching script")) - .mockResolvedValueOnce(JSON.stringify({ ok: true, state: "closed" })); + .mockResolvedValueOnce([1, "closed"]); const result = await recordResult( mockRedis, @@ -221,9 +224,7 @@ describe("recordResult", () => { }); it("passes correct cb key for target", async () => { - mockSendCommand.mockResolvedValueOnce( - JSON.stringify({ ok: true, state: "closed" }), - ); + mockSendCommand.mockResolvedValueOnce([1, "closed"]); await recordResult(mockRedis, "my-target", true, defaultConfig); @@ -236,6 +237,8 @@ describe("getRedisClient", () => { beforeEach(() => { resetRedisClient(); delete process.env.ELASTICACHE_ENDPOINT; + delete process.env.ELASTICACHE_CACHE_NAME; + delete process.env.ELASTICACHE_IAM_USERNAME; }); it("throws when ELASTICACHE_ENDPOINT is not set", async () => { @@ -244,27 +247,43 @@ describe("getRedisClient", () => { ); }); - it("creates and connects a Redis client", async () => { - process.env.ELASTICACHE_ENDPOINT = "localhost"; + it("throws when ELASTICACHE_IAM_USERNAME is not set", async () => { + process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; + + await expect(getRedisClient()).rejects.toThrow( + "ELASTICACHE_IAM_USERNAME is required", + ); + }); + + it("creates and connects a Redis client with IAM token", async () => { + process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; + process.env.ELASTICACHE_CACHE_NAME = "my-cache"; + process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; const client = await getRedisClient(); expect(client).toBeDefined(); + expect(mockPresign).toHaveBeenCalled(); expect(mockConnect).toHaveBeenCalled(); }); - it("returns cached client when already open", async () => { - process.env.ELASTICACHE_ENDPOINT = "localhost"; + it("returns cached client when already open and token is valid", async () => { + process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; + process.env.ELASTICACHE_CACHE_NAME = "my-cache"; + process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; const first = await getRedisClient(); const second = await getRedisClient(); expect(first).toBe(second); expect(mockConnect).toHaveBeenCalledTimes(1); + expect(mockPresign).toHaveBeenCalledTimes(1); }); it("registers error handler on client", async () => { - process.env.ELASTICACHE_ENDPOINT = "localhost"; + process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; + process.env.ELASTICACHE_CACHE_NAME = "my-cache"; + process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; await getRedisClient(); diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 4f665229..33d743cc 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -397,6 +397,27 @@ describe("processRecords", () => { expect(emitCircuitBreakerOpen).toHaveBeenCalledWith("target-1"); }); + it("does not emit CircuitBreakerOpen when recordResult returns failed", async () => { + const targetCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: true } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetCb); + mockDeliverPayload.mockResolvedValue({ + outcome: "transient_failure", + statusCode: 503, + }); + mockRecordResult.mockResolvedValue({ ok: false, state: "failed" }); + + const { emitCircuitBreakerOpen } = jest.requireMock( + "services/delivery-metrics", + ); + + await processRecords([makeRecord()]); + + expect(emitCircuitBreakerOpen).not.toHaveBeenCalled(); + }); + it("does not emit CircuitBreakerOpen when recordResult returns closed", async () => { const targetCb = { ...DEFAULT_TARGET, diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index cf12c56a..e30d5502 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -37,7 +37,7 @@ import { flushMetrics } from "services/delivery-metrics"; type RedisClientType = Awaited>; -const DEFAULT_MAX_RETRY_DURATION_MS = 3_600_000; +const DEFAULT_MAX_RETRY_DURATION_MS = 7_200_000; const DEFAULT_CONCURRENCY_LIMIT = 5; const gateConfig: EndpointGateConfig = { @@ -122,7 +122,7 @@ async function handleDeliveryResult( const backoffSec = jitteredBackoffSeconds(receiveCount); if (cbEnabled) { const cbOutcome = await recordResult(redis, targetId, false, gateConfig); - if (!cbOutcome.ok) { + if (cbOutcome.state === "opened") { recordCircuitBreakerOpen(targetId); } } diff --git a/lambdas/https-client-lambda/src/services/admit.lua b/lambdas/https-client-lambda/src/services/admit.lua index 8fdf2b15..fd56decb 100644 --- a/lambdas/https-client-lambda/src/services/admit.lua +++ b/lambdas/https-client-lambda/src/services/admit.lua @@ -1,95 +1,203 @@ --- admit.lua --- Atomic token-bucket rate limiter + circuit-breaker admission check. --- KEYS[1] = rl:{targetId} (rate limiter hash) --- KEYS[2] = cb:{targetId} (circuit breaker hash) --- ARGV[1] = now (epoch ms) --- ARGV[2] = refillPerSec (tokens/sec from target config) --- ARGV[3] = capacity (burst capacity) --- ARGV[4] = cbProbeIntervalMs --- ARGV[5] = cbEnabled ("1" or "0") --- ARGV[6] = decayPeriodMs - -local rl_key = KEYS[1] -local cb_key = KEYS[2] -local now = tonumber(ARGV[1]) -local refillPerSec = tonumber(ARGV[2]) -local capacity = tonumber(ARGV[3]) -local cbProbeIntervalMs = tonumber(ARGV[4]) -local cbEnabled = ARGV[5] == "1" -local decayPeriodMs = tonumber(ARGV[6]) - --- Load circuit breaker state -local opened_until_ms = tonumber(redis.call("HGET", cb_key, "opened_until_ms") or "0") or 0 -local last_probe_ms = tonumber(redis.call("HGET", cb_key, "last_probe_ms") or "0") or 0 - --- Circuit breaker evaluation (only when enabled) -if cbEnabled and opened_until_ms > 0 and now < opened_until_ms then - -- Circuit is open — check for probe slot - if cbProbeIntervalMs > 0 and (now - last_probe_ms) >= cbProbeIntervalMs then - redis.call("HSET", cb_key, "last_probe_ms", tostring(now)) - return cjson.encode({ - allowed = true, - probe = true, - effectiveRate = 0, - }) +-- admit.lua — Decides whether a request to an endpoint is allowed. +-- +-- Three sequential checks run atomically: +-- 1. Circuit breaker — is the endpoint currently healthy? +-- 2. Sliding window — roll the two-window error-rate accounting state if needed +-- 3. Token bucket — is the endpoint within its rate limit? +-- +-- A request is allowed only when all three checks pass. +-- +-- While the circuit is open, a timed probe is let through at most once per +-- cbProbeIntervalMs so the caller can test whether the endpoint has recovered. +-- The probe bypasses the rate limit — counting it here would skew a +-- low-volume probe signal against the recovery decision. +-- +-- After the circuit closes, the token fill rate ramps up linearly from +-- near-zero to full over decayPeriodMs to avoid a thundering herd on recovery. +-- +-- Returns: { allowed (0|1), reason, retryAfterMs, effectiveRate } + +-- Keys +local cbKey = KEYS[1] -- cb:{endpoint} circuit breaker state hash +local rlKey = KEYS[2] -- rl:{endpoint} rate limiter state hash + +-- Arguments +local now = tonumber(ARGV[1]) or 0 -- current wall-clock time (ms) +local capacity = tonumber(ARGV[2]) or 0 -- token bucket maximum capacity +local refillPerSec = tonumber(ARGV[3]) or 0 -- full token fill rate (tokens/sec) +local cooldownMs = tonumber(ARGV[4]) or 0 -- how long the circuit stays open (ms) +local decayPeriodMs = tonumber(ARGV[5]) or 0 -- ramp-up window after circuit closes (ms) +local cbWindowPeriodMs = tonumber(ARGV[6]) or 0 -- error-rate sliding window duration (ms) +local cbProbeIntervalMs = tonumber(ARGV[7]) or 0 -- minimum gap between probe requests (ms; 0 = no probes) + +-- TTL policy: circuit breaker state must outlive the cooldown window so that +-- the ramp-up period remains visible to subsequent calls after a close. +-- Rate limiter state needs only a short idle window. +local cbTtlSeconds = math.ceil(cooldownMs / 1000) + 60 +local rlTtlSeconds = 120 + +-------------------------------------------------------------------------------- +-- LOAD STATE +-------------------------------------------------------------------------------- + +local cb = redis.call("HMGET", cbKey, + "opened_until_ms", "cb_window_from", "cb_failures", "cb_attempts", "last_probe_ms", + "cb_prev_failures", "cb_prev_attempts") +local openedUntil = tonumber(cb[1] or "0") +local cbWindowFrom = tonumber(cb[2] or "0") +local cbFailures = tonumber(cb[3] or "0") +local cbAttempts = tonumber(cb[4] or "0") +local lastProbeMs = tonumber(cb[5] or "0") +local cbPrevFailures = tonumber(cb[6] or "0") +local cbPrevAttempts = tonumber(cb[7] or "0") + +local rl = redis.call("HMGET", rlKey, "tokens", "last_refill_ms") +local tokens = tonumber(rl[1] or capacity) +local lastRefill = tonumber(rl[2] or now) + +-------------------------------------------------------------------------------- +-- 1. CIRCUIT BREAKER +-- +-- The circuit is open when openedUntil is set and has not yet elapsed. +-- All requests are rejected while open to give the endpoint time to recover. +-- +-- Timed probes: once per cbProbeIntervalMs a single request is allowed +-- through even while the circuit is open. The caller must record the +-- outcome via record-result.lua; a successful probe will close the circuit +-- and trigger the ramp-up phase. +-------------------------------------------------------------------------------- + +if openedUntil > 0 and now < openedUntil then + -- Allow a probe through if the probe interval has elapsed + if cbProbeIntervalMs > 0 and (now - lastProbeMs) >= cbProbeIntervalMs then + lastProbeMs = now + redis.call("HSET", cbKey, + "opened_until_ms", openedUntil, + "cb_window_from", cbWindowFrom, + "cb_failures", cbFailures, + "cb_attempts", cbAttempts, + "last_probe_ms", lastProbeMs, + "cb_prev_failures", cbPrevFailures, + "cb_prev_attempts", cbPrevAttempts + ) + redis.call("EXPIRE", cbKey, cbTtlSeconds) + return { 1, "probe", 0, 0 } end - -- No probe slot available - local retryAfterMs = opened_until_ms - now - return cjson.encode({ - allowed = false, - reason = "circuit_open", - retryAfterMs = retryAfterMs, - effectiveRate = 0, - }) + + -- Circuit is open and no probe slot is available — reject + return { 0, "circuit_open", openedUntil - now, 0 } end --- Compute effective rate (with decay scaling if applicable) -local effectiveRate = refillPerSec +-------------------------------------------------------------------------------- +-- 2. SLIDING WINDOW +-- +-- Two windows (current + previous) together approximate a sliding window over +-- cbWindowPeriodMs. When the current window expires it is promoted to previous +-- and a fresh current window starts. record-result.lua blends the two windows +-- using a time-based weight to smooth the error rate across the boundary rather +-- than resetting it to zero at expiry. +-- +-- record-result.lua is responsible for incrementing the counters; this script +-- is only responsible for rolling the window boundary forward when it expires. +-------------------------------------------------------------------------------- -if cbEnabled and opened_until_ms > 0 and now >= opened_until_ms and decayPeriodMs > 0 then - local elapsed_since_close = now - opened_until_ms - if elapsed_since_close < decayPeriodMs then - effectiveRate = refillPerSec * (elapsed_since_close / decayPeriodMs) - if effectiveRate < 0.001 then - effectiveRate = 0.001 - end +if cbWindowFrom == 0 then + -- No window exists yet — start one now + cbWindowFrom = now +elseif (now - cbWindowFrom) > cbWindowPeriodMs then + -- Current window has expired — roll it forward + if (now - cbWindowFrom) > (2 * cbWindowPeriodMs) then + -- Both current and previous windows are stale: a long quiet period means + -- old failure counts are no longer relevant to the health of the endpoint. + cbPrevFailures = 0 + cbPrevAttempts = 0 + else + -- Promote current → previous so it can be blended with the new current window + cbPrevFailures = cbFailures + cbPrevAttempts = cbAttempts end + cbFailures = 0 + cbAttempts = 0 + cbWindowFrom = now end --- Load rate limiter state -local tokens = tonumber(redis.call("HGET", rl_key, "tokens") or "") or capacity -local last_refill_ms = tonumber(redis.call("HGET", rl_key, "last_refill_ms") or "") or now +-------------------------------------------------------------------------------- +-- 3. TOKEN BUCKET +-- +-- Refills tokens based on elapsed time, then tries to consume one. +-- If no tokens are available the request is rate-limited. +-- +-- Ramp-up: after the circuit closes (openedUntil is set but in the past), +-- effectiveRate scales linearly from near-zero to the full refillPerSec over +-- decayPeriodMs. This deliberately slows recovery traffic so a flapping +-- endpoint is not immediately overwhelmed. +-- Once decayPeriodMs elapses, openedUntil is cleared and the full rate resumes. +-------------------------------------------------------------------------------- --- Refill tokens -local elapsed_ms = now - last_refill_ms -if elapsed_ms > 0 then - tokens = math.min(capacity, tokens + elapsed_ms * effectiveRate / 1000) -end +local effectiveRate = refillPerSec --- Check rate limit -if tokens < 1 then - -- Compute retry-after based on effective rate - local retryAfterMs = 0 - if effectiveRate > 0 then - retryAfterMs = math.ceil((1 - tokens) / effectiveRate * 1000) +if openedUntil > 0 and now > openedUntil and decayPeriodMs > 0 then + -- Circuit has recently closed — apply linear ramp-up + local sinceClose = now - openedUntil + if sinceClose >= decayPeriodMs then + -- Decay period fully elapsed — restore full rate and clear the CB timestamp + openedUntil = 0 else - retryAfterMs = 1000 + -- Still within decay period — scale fill rate proportionally to time elapsed + local fraction = sinceClose / decayPeriodMs + effectiveRate = math.max(1, math.floor(refillPerSec * fraction)) + end +end + +-- Refill tokens based on time elapsed since last refill +local elapsed = now - lastRefill +if elapsed > 0 then + local refill = math.floor((elapsed * effectiveRate) / 1000) + if refill > 0 then + tokens = math.min(capacity, tokens + refill) + lastRefill = now end - return cjson.encode({ - allowed = false, - reason = "rate_limited", - retryAfterMs = retryAfterMs, - effectiveRate = effectiveRate, - }) end --- Deduct token and update state +-- Not enough tokens — rate-limited +-- TTL is intentionally not refreshed here; it was set on the last allowed call. +if tokens < 1 then + redis.call("HSET", cbKey, + "opened_until_ms", openedUntil, + "cb_window_from", cbWindowFrom, + "cb_failures", cbFailures, + "cb_attempts", cbAttempts, + "cb_prev_failures", cbPrevFailures, + "cb_prev_attempts", cbPrevAttempts + ) + redis.call("HSET", rlKey, + "tokens", tokens, + "last_refill_ms", lastRefill + ) + return { 0, "rate_limited", 1000, effectiveRate } +end + +-- Consume one token tokens = tokens - 1 -redis.call("HSET", rl_key, "tokens", tostring(tokens)) -redis.call("HSET", rl_key, "last_refill_ms", tostring(now)) - -return cjson.encode({ - allowed = true, - probe = false, - effectiveRate = effectiveRate, -}) + +-------------------------------------------------------------------------------- +-- 4. PERSIST STATE AND ALLOW +-------------------------------------------------------------------------------- + +redis.call("HSET", cbKey, + "opened_until_ms", openedUntil, + "cb_window_from", cbWindowFrom, + "cb_failures", cbFailures, + "cb_attempts", cbAttempts, + "cb_prev_failures", cbPrevFailures, + "cb_prev_attempts", cbPrevAttempts +) +redis.call("HSET", rlKey, + "tokens", tokens, + "last_refill_ms", lastRefill +) + +redis.call("EXPIRE", cbKey, cbTtlSeconds) +redis.call("EXPIRE", rlKey, rlTtlSeconds) + +return { 1, "allowed", 0, effectiveRate } diff --git a/lambdas/https-client-lambda/src/services/config-loader.ts b/lambdas/https-client-lambda/src/services/config-loader.ts index 11aa7c34..7f5b7bdc 100644 --- a/lambdas/https-client-lambda/src/services/config-loader.ts +++ b/lambdas/https-client-lambda/src/services/config-loader.ts @@ -16,8 +16,7 @@ function getCache(): ConfigSubscriptionCache { } const ttlMs = - Number(process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS) * 1000 || - 300_000; + (Number(process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS) || 300) * 1000; cache = new ConfigSubscriptionCache({ s3Client, diff --git a/lambdas/https-client-lambda/src/services/endpoint-gate.ts b/lambdas/https-client-lambda/src/services/endpoint-gate.ts index e54f9e69..c0dd1b60 100644 --- a/lambdas/https-client-lambda/src/services/endpoint-gate.ts +++ b/lambdas/https-client-lambda/src/services/endpoint-gate.ts @@ -1,4 +1,7 @@ import { type RedisClientType, createClient } from "@redis/client"; +import { SignatureV4 } from "@smithy/signature-v4"; +import { Sha256 } from "@aws-crypto/sha256-js"; +import { fromNodeProviderChain } from "@aws-sdk/credential-providers"; import { createHash } from "node:crypto"; import { logger } from "@nhs-notify-client-callbacks/logger"; import admitLuaSrc from "services/admit.lua"; @@ -21,7 +24,7 @@ export type AdmitResult = AdmitResultAllowed | AdmitResultDenied; export type RecordResultOutcome = | { ok: true; state: "closed" } - | { ok: false; state: "opened" }; + | { ok: false; state: "opened" | "failed" }; export type EndpointGateConfig = { burstCapacity: number; @@ -47,7 +50,7 @@ async function evalScript( sha: string, keys: string[], args: string[], -): Promise { +): Promise { const keyCount = keys.length.toString(); try { return await client.sendCommand([ @@ -74,34 +77,50 @@ export async function admit( cbEnabled: boolean, config: EndpointGateConfig, ): Promise { - const rlKey = `rl:${targetId}`; const cbKey = `cb:${targetId}`; + const rlKey = `rl:${targetId}`; const now = Date.now().toString(); + const probeIntervalMs = cbEnabled ? config.cbProbeIntervalMs.toString() : "0"; - /* eslint-disable sonarjs/null-dereference -- refillPerSec is typed as number, cannot be null */ const args = [ now, - refillPerSec.toString(), config.burstCapacity.toString(), - config.cbProbeIntervalMs.toString(), - cbEnabled ? "1" : "0", + // eslint-disable-next-line sonarjs/null-dereference + refillPerSec.toString(), + config.cbCooldownMs.toString(), config.decayPeriodMs.toString(), + config.cbWindowPeriodMs.toString(), + probeIntervalMs, ]; - /* eslint-enable sonarjs/null-dereference */ if (!admitSha) { admitSha = computeSha1(admitLuaSrc); } - const result = await evalScript( + const raw = (await evalScript( client, admitLuaSrc, admitSha, - [rlKey, cbKey], + [cbKey, rlKey], args, - ); + )) as [number, string, number, number]; + + const [allowed, reason, retryAfterMs, effectiveRate] = raw; - return JSON.parse(result) as AdmitResult; + if (allowed === 1) { + return { + allowed: true, + probe: reason === "probe", + effectiveRate: Number(effectiveRate), + }; + } + + return { + allowed: false, + reason: reason as "circuit_open" | "rate_limited", + retryAfterMs: Number(retryAfterMs), + effectiveRate: Number(effectiveRate), + }; } export async function recordResult( @@ -116,26 +135,32 @@ export async function recordResult( const args = [ now, success ? "1" : "0", - config.cbWindowPeriodMs.toString(), - config.cbErrorThreshold.toString(), - config.cbMinAttempts.toString(), config.cbCooldownMs.toString(), config.decayPeriodMs.toString(), + config.cbErrorThreshold.toString(), + config.cbMinAttempts.toString(), + config.cbWindowPeriodMs.toString(), ]; if (!recordResultSha) { recordResultSha = computeSha1(recordResultLuaSrc); } - const result = await evalScript( + const raw = (await evalScript( client, recordResultLuaSrc, recordResultSha, [cbKey], args, - ); + )) as [number, string]; - return JSON.parse(result) as RecordResultOutcome; + const [ok, state] = raw; + + if (ok === 1) { + return { ok: true, state: "closed" }; + } + + return { ok: false, state: state as "opened" | "failed" }; } export function resetAdmitSha(): void { @@ -143,10 +168,57 @@ export function resetAdmitSha(): void { recordResultSha = undefined; } +const TOKEN_EXPIRY_SECONDS = 900; +const TOKEN_REFRESH_BUFFER_SECONDS = 60; + let redisClient: RedisClientType | undefined; +let tokenExpiry = 0; + +async function generateElastiCacheIamToken(): Promise { + const cacheName = process.env.ELASTICACHE_CACHE_NAME; + const endpoint = process.env.ELASTICACHE_ENDPOINT; + const username = process.env.ELASTICACHE_IAM_USERNAME; + + if (!cacheName || !endpoint || !username) { + throw new Error( + "ELASTICACHE_CACHE_NAME, ELASTICACHE_ENDPOINT, and ELASTICACHE_IAM_USERNAME are required", + ); + } + + const region = process.env.AWS_REGION ?? "eu-west-2"; + + const signer = new SignatureV4({ + credentials: fromNodeProviderChain(), + region, + service: "elasticache", + sha256: Sha256, + }); + + const signed = await signer.presign( + { + protocol: "https:", + method: "GET", + hostname: endpoint, + path: "/", + query: { Action: "connect", User: username }, + headers: { host: endpoint }, + }, + { expiresIn: TOKEN_EXPIRY_SECONDS }, + ); + + tokenExpiry = Date.now() + TOKEN_EXPIRY_SECONDS * 1000; + + const qs = new URLSearchParams( + signed.query as Record, + ).toString(); + return `https://${signed.hostname}${signed.path}?${qs}`; +} export async function getRedisClient(): Promise { - if (redisClient?.isOpen) { + const isTokenValid = + tokenExpiry > Date.now() + TOKEN_REFRESH_BUFFER_SECONDS * 1000; + + if (redisClient?.isOpen && isTokenValid) { return redisClient; } @@ -155,7 +227,23 @@ export async function getRedisClient(): Promise { throw new Error("ELASTICACHE_ENDPOINT is required"); } - redisClient = createClient({ url: `rediss://${endpoint}:6379` }); + const username = process.env.ELASTICACHE_IAM_USERNAME; + if (!username) { + throw new Error("ELASTICACHE_IAM_USERNAME is required"); + } + + if (redisClient?.isOpen) { + await redisClient.disconnect(); + } + + const token = await generateElastiCacheIamToken(); + + redisClient = createClient({ + url: `rediss://${endpoint}:6379`, + username, + password: token, + }); + redisClient.on("error", (err) => { logger.error("Redis connection error", { error: String(err) }); }); @@ -166,4 +254,5 @@ export async function getRedisClient(): Promise { export function resetRedisClient(): void { redisClient = undefined; + tokenExpiry = 0; } diff --git a/lambdas/https-client-lambda/src/services/record-result.lua b/lambdas/https-client-lambda/src/services/record-result.lua index 89b81279..1cc94857 100644 --- a/lambdas/https-client-lambda/src/services/record-result.lua +++ b/lambdas/https-client-lambda/src/services/record-result.lua @@ -1,92 +1,144 @@ --- record-result.lua --- Atomic two-window sliding circuit-breaker state update. --- KEYS[1] = cb:{targetId} (circuit breaker hash) --- ARGV[1] = now (epoch ms) --- ARGV[2] = success ("1" or "0") --- ARGV[3] = cbWindowPeriodMs --- ARGV[4] = cbErrorThreshold (float, e.g. "0.5") --- ARGV[5] = cbMinAttempts (integer) --- ARGV[6] = cbCooldownMs --- ARGV[7] = decayPeriodMs - -local cb_key = KEYS[1] -local now = tonumber(ARGV[1]) -local success = ARGV[2] == "1" -local windowPeriodMs = tonumber(ARGV[3]) -local errorThreshold = tonumber(ARGV[4]) -local minAttempts = tonumber(ARGV[5]) -local cooldownMs = tonumber(ARGV[6]) -local decayPeriodMs = tonumber(ARGV[7]) - --- Load current state -local opened_until_ms = tonumber(redis.call("HGET", cb_key, "opened_until_ms") or "0") or 0 -local cb_window_from = tonumber(redis.call("HGET", cb_key, "cb_window_from") or "0") or 0 -local cb_failures = tonumber(redis.call("HGET", cb_key, "cb_failures") or "0") or 0 -local cb_attempts = tonumber(redis.call("HGET", cb_key, "cb_attempts") or "0") or 0 -local cb_prev_failures = tonumber(redis.call("HGET", cb_key, "cb_prev_failures") or "0") or 0 -local cb_prev_attempts = tonumber(redis.call("HGET", cb_key, "cb_prev_attempts") or "0") or 0 - --- Initialise window if not set -if cb_window_from == 0 then - cb_window_from = now -end +-- record-result.lua — Records the outcome of a delivery attempt. +-- +-- Updates the circuit breaker's error-rate window counters and opens the +-- circuit if the failure rate exceeds the configured threshold. +-- +-- On success: +-- Window counters are left intact. The openedUntil timestamp is preserved +-- while the decay period is still active so that admit.lua can continue +-- computing the linear ramp-up rate. Once the decay period elapses it +-- is zeroed, returning the circuit to a fully clean closed state. +-- +-- On failure: +-- The failure and attempt counters are incremented. A two-window sliding +-- blend is computed before evaluating the trip condition: +-- slidingAttempts = cbAttempts + cbPrevAttempts * prevWeight +-- slidingFailures = cbFailures + cbPrevFailures * prevWeight +-- where prevWeight decays linearly from 1.0 → 0.0 as the current window ages, +-- so previous-window failures fade out gradually rather than dropping off a cliff. +-- The circuit opens when: +-- • the endpoint is not already open (prevents double-tripping and +-- resetting the cooldown timer prematurely), AND +-- • slidingAttempts >= cbMinAttempts (avoids tripping on statistically +-- insignificant data at cold start or just after a window roll), AND +-- • slidingFailures / slidingAttempts exceeds cbErrorThreshold. +-- On open, all counters (current and previous) are reset to zero so the +-- fresh cooldown window begins with a clean slate ready for recovery. +-- +-- Returns: { ok (0|1), state } +-- state: "closed" | "opened" | "failed" --- Check for window expiry and roll -if (now - cb_window_from) >= windowPeriodMs then - cb_prev_failures = cb_failures - cb_prev_attempts = cb_attempts - cb_failures = 0 - cb_attempts = 0 - cb_window_from = now -end +-- Keys +local cbKey = KEYS[1] -- cb:{endpoint} circuit breaker state hash --- Increment counters -cb_attempts = cb_attempts + 1 -if not success then - cb_failures = cb_failures + 1 -end +-- Arguments +local now = tonumber(ARGV[1]) or 0 -- current wall-clock time (ms) +local success = tonumber(ARGV[2]) or 0 -- 1 = success, 0 = failure +local cooldownMs = tonumber(ARGV[3]) or 0 -- how long the circuit stays open (ms) +local decayPeriodMs = tonumber(ARGV[4]) or 0 -- ramp-up window after circuit closes (ms) +local cbErrorThreshold = tonumber(ARGV[5]) or 0 -- error-rate fraction that trips the circuit (e.g. 0.5) +local cbMinAttempts = tonumber(ARGV[6]) or 0 -- minimum samples before the circuit can trip +local cbWindowPeriodMs = tonumber(ARGV[7]) or 0 -- error-rate sliding window duration (ms) --- Compute two-window blended error rate -local elapsed_in_window = now - cb_window_from -local prev_weight = 0 -if windowPeriodMs > 0 and elapsed_in_window < windowPeriodMs then - prev_weight = 1 - (elapsed_in_window / windowPeriodMs) +-- TTL policy: keep circuit breaker state alive for at least the cooldown +-- duration plus a buffer so the decay period remains visible after a close. +local cbTtlSeconds = math.ceil(cooldownMs / 1000) + 60 + +local function refreshCbExpiry() + redis.call("EXPIRE", cbKey, cbTtlSeconds) end -local blended_failures = cb_prev_failures * prev_weight + cb_failures -local blended_attempts = cb_prev_attempts * prev_weight + cb_attempts +-------------------------------------------------------------------------------- +-- LOAD CURRENT STATE +-------------------------------------------------------------------------------- -local state = "closed" +local cb = redis.call("HMGET", cbKey, + "opened_until_ms", "cb_window_from", "cb_failures", "cb_attempts", + "cb_prev_failures", "cb_prev_attempts") +local openedUntil = tonumber(cb[1] or "0") +local cbWindowFrom = tonumber(cb[2] or "0") +local cbFailures = tonumber(cb[3] or "0") +local cbAttempts = tonumber(cb[4] or "0") +local cbPrevFailures = tonumber(cb[5] or "0") +local cbPrevAttempts = tonumber(cb[6] or "0") --- Check if we should open the circuit -if blended_attempts >= minAttempts and blended_attempts > 0 then - local error_rate = blended_failures / blended_attempts - if error_rate >= errorThreshold then - opened_until_ms = now + cooldownMs - state = "opened" - end -end +-- Every outcome (success or failure) contributes to the error-rate window +cbAttempts = cbAttempts + 1 + +-------------------------------------------------------------------------------- +-- SUCCESS — preserve openedUntil during decay, then zero it +-- +-- admit.lua uses openedUntil to calculate the linear ramp-up rate while the +-- decay period is active. That timestamp must survive in Redis until the +-- decay period ends. Clearing it prematurely would snap the fill rate back +-- to full immediately rather than ramping gradually. +-------------------------------------------------------------------------------- --- During active decay, preserve opened_until_ms as decay start marker -if opened_until_ms > 0 and now >= opened_until_ms then - local elapsed_since_close = now - opened_until_ms - if elapsed_since_close >= decayPeriodMs then - opened_until_ms = 0 - end +if success == 1 then + -- Keep openedUntil only if we are still within the decay window + local inDecayWindow = openedUntil > 0 and now > openedUntil and (now - openedUntil) < decayPeriodMs + local preservedOpenedUntil = inDecayWindow and openedUntil or 0 + + redis.call("HSET", cbKey, + "opened_until_ms", preservedOpenedUntil, + "cb_window_from", cbWindowFrom, + "cb_failures", cbFailures, + "cb_attempts", cbAttempts, + "cb_prev_failures", cbPrevFailures, + "cb_prev_attempts", cbPrevAttempts + ) + refreshCbExpiry() + return { 1, "closed" } end --- Write updated state -redis.call("HSET", cb_key, - "opened_until_ms", tostring(opened_until_ms), - "cb_window_from", tostring(cb_window_from), - "cb_failures", tostring(cb_failures), - "cb_attempts", tostring(cb_attempts), - "cb_prev_failures", tostring(cb_prev_failures), - "cb_prev_attempts", tostring(cb_prev_attempts) -) +-------------------------------------------------------------------------------- +-- FAILURE — increment counter and evaluate whether to open the circuit +-- +-- The trip condition is evaluated against a sliding blend of current and +-- previous window counts, not the raw current-window counts alone. This +-- prevents a burst of failures from escaping detection simply because it +-- straddles a window boundary and gets partially discarded by a reset. +-------------------------------------------------------------------------------- + +cbFailures = cbFailures + 1 -if state == "opened" then - return cjson.encode({ ok = false, state = "opened" }) +-- The circuit is already open when openedUntil is set and has not yet elapsed. +-- Guard against double-tripping, which would reset the cooldown timer early. +local circuitAlreadyOpen = openedUntil > 0 and now < openedUntil + +-- Blend current and previous window counts. +-- prevWeight decays linearly from 1.0 → 0.0 as the current window ages, +-- so previous-window failures fade out gradually rather than dropping off a cliff. +local windowElapsed = cbWindowFrom > 0 and (now - cbWindowFrom) or 0 +local hasWindow = cbWindowPeriodMs > 0 +local prevWeight = hasWindow and math.max(0, (cbWindowPeriodMs - windowElapsed) / cbWindowPeriodMs) or 0 +local slidingFailures = cbFailures + cbPrevFailures * prevWeight +local slidingAttempts = cbAttempts + cbPrevAttempts * prevWeight + +if not circuitAlreadyOpen + and slidingAttempts >= cbMinAttempts -- enough data to be statistically meaningful + and (slidingFailures / slidingAttempts) > cbErrorThreshold then + -- Trip the circuit — reset all counters so recovery starts from a clean slate + redis.call("HSET", cbKey, + "opened_until_ms", now + cooldownMs, + "cb_window_from", 0, + "cb_failures", 0, + "cb_attempts", 0, + "cb_prev_failures", 0, + "cb_prev_attempts", 0 + ) + refreshCbExpiry() + return { 0, "opened" } end -return cjson.encode({ ok = true, state = "closed" }) +-- Below the threshold — record the failure but keep the circuit closed +redis.call("HSET", cbKey, + "opened_until_ms", openedUntil, + "cb_window_from", cbWindowFrom, + "cb_failures", cbFailures, + "cb_attempts", cbAttempts, + "cb_prev_failures", cbPrevFailures, + "cb_prev_attempts", cbPrevAttempts +) +refreshCbExpiry() +return { 0, "failed" } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c96b8ef9..0aeb10bd 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -40,6 +40,9 @@ catalogs: specifier: ^4.3.6 version: 4.3.6 aws: + '@aws-crypto/sha256-js': + specifier: ^5.2.0 + version: 5.2.0 '@aws-sdk/client-cloudwatch': specifier: ^3.1025.0 version: 3.1029.0 @@ -64,6 +67,9 @@ catalogs: '@aws-sdk/credential-providers': specifier: ^3.1023.0 version: 3.1026.0 + '@smithy/signature-v4': + specifier: ^5.0.0 + version: 5.3.13 lint: '@eslint/js': specifier: ^9.39.4 @@ -328,6 +334,9 @@ importers: lambdas/https-client-lambda: dependencies: + '@aws-crypto/sha256-js': + specifier: catalog:aws + version: 5.2.0 '@aws-sdk/client-s3': specifier: catalog:aws version: 3.1029.0 @@ -340,6 +349,9 @@ importers: '@aws-sdk/client-ssm': specifier: catalog:aws version: 3.1029.0 + '@aws-sdk/credential-providers': + specifier: catalog:aws + version: 3.1026.0 '@nhs-notify-client-callbacks/config-subscription-cache': specifier: workspace:* version: link:../../src/config-subscription-cache @@ -352,6 +364,9 @@ importers: '@redis/client': specifier: catalog:app version: 1.6.1 + '@smithy/signature-v4': + specifier: catalog:aws + version: 5.3.13 aws-embedded-metrics: specifier: catalog:app version: 4.2.1 diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index f8f8a39b..52fb20f0 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -26,8 +26,10 @@ catalogs: "@aws-sdk/client-secrets-manager": "^3.1023.0" "@aws-sdk/client-sqs": "^3.1023.0" "@aws-sdk/client-ssm": "^3.1025.0" + "@aws-crypto/sha256-js": "^5.2.0" "@aws-sdk/client-sts": "^3.1023.0" "@aws-sdk/credential-providers": "^3.1023.0" + "@smithy/signature-v4": "^5.0.0" lint: "@eslint/js": "^9.39.4" "@stylistic/eslint-plugin": "^5.10.0" From a53ed730e00fc6e950ccfe66163c609d132a5b56 Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Mon, 20 Apr 2026 11:18:20 +0100 Subject: [PATCH 12/65] CCM-16073 - PR feedback --- .../terraform/components/callbacks/README.md | 2 +- .../callbacks/elasticache_delivery_state.tf | 14 +++++++------- .../terraform/components/callbacks/variables.tf | 6 +++--- .../src/__tests__/config-loader.test.ts | 8 ++++++++ .../src/__tests__/endpoint-gate.test.ts | 9 +++++++++ .../src/__tests__/handler.test.ts | 17 +++++++++++++++++ 6 files changed, 45 insertions(+), 11 deletions(-) diff --git a/infrastructure/terraform/components/callbacks/README.md b/infrastructure/terraform/components/callbacks/README.md index e8417f83..8ec08700 100644 --- a/infrastructure/terraform/components/callbacks/README.md +++ b/infrastructure/terraform/components/callbacks/README.md @@ -19,7 +19,7 @@ | [component](#input\_component) | The variable encapsulating the name of this component | `string` | `"callbacks"` | no | | [default\_tags](#input\_default\_tags) | A map of default tags to apply to all taggable resources within the component | `map(string)` | `{}` | no | | [deploy\_mock\_clients](#input\_deploy\_mock\_clients) | Flag to deploy mock webhook lambda for integration testing (test/dev environments only) | `bool` | `false` | no | -| [elasticache\_data\_storage\_maximum\_mb](#input\_elasticache\_data\_storage\_maximum\_mb) | Maximum data storage in MB for the ElastiCache Serverless delivery state cache | `number` | `1024` | no | +| [elasticache\_data\_storage\_maximum\_gb](#input\_elasticache\_data\_storage\_maximum\_gb) | Maximum data storage in GB for the ElastiCache Serverless delivery state cache | `number` | `1` | no | | [enable\_event\_anomaly\_detection](#input\_enable\_event\_anomaly\_detection) | Enable CloudWatch anomaly detection alarm for inbound event queue message reception | `bool` | `true` | no | | [enable\_xray\_tracing](#input\_enable\_xray\_tracing) | Enable AWS X-Ray active tracing for Lambda functions | `bool` | `false` | no | | [environment](#input\_environment) | The name of the tfscaffold environment | `string` | n/a | yes | diff --git a/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf b/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf index 3e90f293..c2f8fa86 100644 --- a/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf +++ b/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf @@ -13,8 +13,8 @@ resource "aws_elasticache_serverless_cache" "delivery_state" { cache_usage_limits { data_storage { - maximum = var.elasticache_data_storage_maximum_mb - unit = "MB" + maximum = var.elasticache_data_storage_maximum_gb + unit = "GB" } ecpu_per_second { @@ -94,12 +94,12 @@ resource "aws_cloudwatch_metric_alarm" "elasticache_storage_utilisation" { alarm_name = "${local.csi}-elasticache-storage-utilisation" alarm_description = join(" ", [ "CAPACITY: ElastiCache data storage utilisation exceeds 80%.", - "Review stored data or increase elasticache_data_storage_maximum_mb.", + "Review stored data or increase elasticache_data_storage_maximum_gb.", ]) comparison_operator = "GreaterThanThreshold" evaluation_periods = 2 - threshold = var.elasticache_data_storage_maximum_mb * 0.8 + threshold = var.elasticache_data_storage_maximum_gb * 0.8 actions_enabled = true treat_missing_data = "notBreaching" @@ -120,9 +120,9 @@ resource "aws_cloudwatch_metric_alarm" "elasticache_storage_utilisation" { } metric_query { - id = "storage_used_mb" - expression = "storage_used / 1048576" - label = "Storage Used (MB)" + id = "storage_used_gb" + expression = "storage_used / 1073741824" + label = "Storage Used (GB)" return_data = true } diff --git a/infrastructure/terraform/components/callbacks/variables.tf b/infrastructure/terraform/components/callbacks/variables.tf index 0e3ccd7f..0f2fc202 100644 --- a/infrastructure/terraform/components/callbacks/variables.tf +++ b/infrastructure/terraform/components/callbacks/variables.tf @@ -184,8 +184,8 @@ variable "mtls_cert_secret_arn" { default = "" } -variable "elasticache_data_storage_maximum_mb" { +variable "elasticache_data_storage_maximum_gb" { type = number - description = "Maximum data storage in MB for the ElastiCache Serverless delivery state cache" - default = 1024 + description = "Maximum data storage in GB for the ElastiCache Serverless delivery state cache" + default = 1 } diff --git a/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts b/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts index 94ca1d00..520a05ed 100644 --- a/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts @@ -143,6 +143,14 @@ describe("loadTargetConfig", () => { ); }); + it("throws when client config is not found", async () => { + mockS3Send.mockResolvedValue(makeS3Response(null)); + + await expect( + loadTargetConfig("unknown-client", "target-1"), + ).rejects.toThrow("Invalid client config for 'unknown-client'"); + }); + it("throws when target not found in config", async () => { mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); diff --git a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts index 620d77be..cf83107b 100644 --- a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts @@ -255,6 +255,15 @@ describe("getRedisClient", () => { ); }); + it("throws when ELASTICACHE_CACHE_NAME is not set", async () => { + process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; + process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; + + await expect(getRedisClient()).rejects.toThrow( + "ELASTICACHE_CACHE_NAME, ELASTICACHE_ENDPOINT, and ELASTICACHE_IAM_USERNAME are required", + ); + }); + it("creates and connects a Redis client with IAM token", async () => { process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; process.env.ELASTICACHE_CACHE_NAME = "my-cache"; diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 33d743cc..8b24d5e8 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -364,6 +364,23 @@ describe("processRecords", () => { expect(mockRecordResult).not.toHaveBeenCalled(); }); + it("does not call recordResult when CB is disabled on transient failure", async () => { + const targetNoCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: false } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetNoCb); + mockDeliverPayload.mockResolvedValue({ + outcome: "transient_failure", + statusCode: 503, + }); + + await processRecords([makeRecord()]); + + expect(mockRecordResult).not.toHaveBeenCalled(); + expect(mockChangeVisibility).toHaveBeenCalled(); + }); + it("does not call recordResult when CB is disabled on success", async () => { const targetNoCb = { ...DEFAULT_TARGET, From 4e59c1c20c88922b87f1965634eb78929a538bcf Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Mon, 20 Apr 2026 11:36:37 +0100 Subject: [PATCH 13/65] CCM-16073 - PR feedback --- .../src/__tests__/config-loader.test.ts | 40 +++++++++++++++++++ .../src/__tests__/endpoint-gate.test.ts | 18 +++++++++ .../src/__tests__/https-client.test.ts | 36 +++++++++++++++++ .../src/__tests__/tls-agent-factory.test.ts | 28 +++++++++++++ 4 files changed, 122 insertions(+) diff --git a/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts b/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts index 520a05ed..5c133ca6 100644 --- a/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts @@ -1,4 +1,5 @@ import { GetObjectCommand } from "@aws-sdk/client-s3"; +import { ConfigSubscriptionCache } from "@nhs-notify-client-callbacks/config-subscription-cache"; import { loadTargetConfig, resetCache } from "services/config-loader"; @@ -158,4 +159,43 @@ describe("loadTargetConfig", () => { "Target 'nonexistent' not found in config for client 'client-1'", ); }); + + it("uses default prefix when CLIENT_SUBSCRIPTION_CONFIG_PREFIX is not set", async () => { + const saved = process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX; + delete process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX; + resetCache(); + mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); + + await loadTargetConfig("client-1", "target-1"); + + const command: GetObjectCommand = mockS3Send.mock.calls[0][0]; + expect(command.input.Key).toBe("client_subscriptions/client-1.json"); + + process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX = saved; + }); + + it("uses default TTL when CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS is not set", async () => { + const saved = process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS; + delete process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS; + resetCache(); + mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); + + const result = await loadTargetConfig("client-1", "target-1"); + + expect(result).toEqual(VALID_TARGET); + + process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS = saved; + }); + + it("throws when loadClientConfig resolves to undefined", async () => { + const spy = jest + .spyOn(ConfigSubscriptionCache.prototype, "loadClientConfig") + .mockResolvedValueOnce(undefined); + + await expect(loadTargetConfig("client-1", "target-1")).rejects.toThrow( + "No configuration found for client 'client-1'", + ); + + spy.mockRestore(); + }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts index cf83107b..394a89b6 100644 --- a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts @@ -303,4 +303,22 @@ describe("getRedisClient", () => { )![1] as (err: Error) => void; errorHandler(new Error("test error")); }); + + it("disconnects existing client when token expires before reconnecting", async () => { + jest.useFakeTimers(); + process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; + process.env.ELASTICACHE_CACHE_NAME = "my-cache"; + process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; + + await getRedisClient(); + + jest.advanceTimersByTime(841_000); + + await getRedisClient(); + + expect(mockDisconnect).toHaveBeenCalledTimes(1); + expect(mockConnect).toHaveBeenCalledTimes(2); + + jest.useRealTimers(); + }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/https-client.test.ts b/lambdas/https-client-lambda/src/__tests__/https-client.test.ts index bb1c0d11..e1850567 100644 --- a/lambdas/https-client-lambda/src/__tests__/https-client.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/https-client.test.ts @@ -251,4 +251,40 @@ describe("deliverPayload", () => { expect(result).toEqual({ outcome: "transient_failure", statusCode: 0 }); }); + + it("treats undefined statusCode as transient failure with code 0", async () => { + const mockReq = new EventEmitter() as EventEmitter & { + end: jest.Mock; + destroy: jest.Mock; + }; + mockReq.end = jest.fn(); + mockReq.destroy = jest.fn(); + + jest.spyOn(https, "request").mockImplementation((...args: unknown[]) => { + const callback = args.find((a) => typeof a === "function") as + | ((res: MockResponse) => void) + | undefined; + + const res: MockResponse = Object.assign(new EventEmitter(), { + statusCode: undefined as unknown as number, + headers: {}, + resume: jest.fn(), + }); + + if (callback) { + process.nextTick(() => callback(res)); + } + + return mockReq as unknown as ReturnType; + }); + + const result = await deliverPayload( + createTarget(), + '{"test":true}', + "sig-abc", + createMockAgent(), + ); + + expect(result).toEqual({ outcome: "transient_failure", statusCode: 0 }); + }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts b/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts index 3f6f27ca..b2ca7877 100644 --- a/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts @@ -382,4 +382,32 @@ describe("tls-agent-factory", () => { ); expect(mockS3Send).not.toHaveBeenCalled(); }); + + it("uses default CERT_EXPIRY_THRESHOLD_MS when env var is not set", async () => { + delete process.env.CERT_EXPIRY_THRESHOLD_MS; + jest.resetModules(); + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery/tls-agent-factory"); + + mockS3PemResponse(COMBINED_PEM); + const agent = await mod.buildAgent( + createTarget({ delivery: { mtls: { enabled: true } } }), + ); + + expect(agent).toBeDefined(); + }); + + it("handles PEM with no private key or certificate sections", async () => { + mockS3Send.mockResolvedValue({ + Body: { + transformToString: jest.fn().mockResolvedValue("no-pem-content"), + }, + }); + + const agent = await buildAgent( + createTarget({ delivery: { mtls: { enabled: true } } }), + ); + + expect(agent).toBeDefined(); + }); }); From af98725f8ebe803e84d58f13dd8cea15de346fa3 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Fri, 17 Apr 2026 15:11:21 +0100 Subject: [PATCH 14/65] Lua unit tests --- lambdas/https-client-lambda/package.json | 1 + .../src/__tests__/admit-lua.test.ts | 351 ++++++++++++++++++ .../src/__tests__/helpers/fengari.d.ts | 32 ++ .../src/__tests__/helpers/lua-redis-mock.ts | 144 +++++++ .../src/__tests__/record-result-lua.test.ts | 316 ++++++++++++++++ pnpm-lock.yaml | 29 ++ 6 files changed, 873 insertions(+) create mode 100644 lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/helpers/fengari.d.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/helpers/lua-redis-mock.ts create mode 100644 lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts diff --git a/lambdas/https-client-lambda/package.json b/lambdas/https-client-lambda/package.json index d878392f..bc08ca4b 100644 --- a/lambdas/https-client-lambda/package.json +++ b/lambdas/https-client-lambda/package.json @@ -23,6 +23,7 @@ "@types/node": "catalog:tools", "@types/node-forge": "catalog:tools", "eslint": "catalog:lint", + "fengari": "^0.1.5", "jest": "catalog:test", "typescript": "catalog:tools" }, diff --git a/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts b/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts new file mode 100644 index 00000000..bf64f5e0 --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts @@ -0,0 +1,351 @@ +import admitLuaSrc from "services/admit.lua"; +import { createRedisStore, evalLua } from "__tests__/helpers/lua-redis-mock"; + +type AdmitArgs = { + now: number; + refillPerSec: number; + capacity: number; + cbProbeIntervalMs: number; + cbEnabled: boolean; + decayPeriodMs: number; +}; + +const defaultArgs: AdmitArgs = { + now: 1_000_000, + refillPerSec: 10, + capacity: 10, + cbProbeIntervalMs: 60_000, + cbEnabled: true, + decayPeriodMs: 300_000, +}; + +function runAdmit( + store: ReturnType, + args: Partial = {}, + targetId = "t1", +) { + const merged = { ...defaultArgs, ...args }; + return evalLua( + admitLuaSrc, + [`rl:${targetId}`, `cb:${targetId}`], + [ + merged.now.toString(), + merged.refillPerSec.toString(), + merged.capacity.toString(), + merged.cbProbeIntervalMs.toString(), + merged.cbEnabled ? "1" : "0", + merged.decayPeriodMs.toString(), + ], + store, + ); +} + +describe("admit.lua", () => { + describe("rate limiting", () => { + it("allows the first request with full token bucket", () => { + const store = createRedisStore(); + const result = runAdmit(store); + + expect(result).toEqual({ + allowed: true, + probe: false, + effectiveRate: 10, + }); + }); + + it("deducts tokens on each call", () => { + const store = createRedisStore(); + + for (let i = 0; i < 10; i++) { + const result = runAdmit(store); + expect(result).toMatchObject({ allowed: true }); + } + + const result = runAdmit(store); + expect(result).toMatchObject({ + allowed: false, + reason: "rate_limited", + }); + }); + + it("returns retryAfterMs when rate limited", () => { + const store = createRedisStore(); + + for (let i = 0; i < 10; i++) { + runAdmit(store); + } + + const result = runAdmit(store); + expect(result).toMatchObject({ + allowed: false, + reason: "rate_limited", + retryAfterMs: expect.any(Number), + }); + expect((result as { retryAfterMs: number }).retryAfterMs).toBeGreaterThan( + 0, + ); + }); + + it("refills tokens over time", () => { + const store = createRedisStore(); + const now = 1_000_000; + + for (let i = 0; i < 10; i++) { + runAdmit(store, { now }); + } + + const denied = runAdmit(store, { now }); + expect(denied).toMatchObject({ allowed: false }); + + const later = now + 1000; + const result = runAdmit(store, { now: later }); + expect(result).toMatchObject({ allowed: true }); + }); + + it("caps tokens at capacity", () => { + const store = createRedisStore(); + + const result = runAdmit(store, { + now: 1_000_000, + capacity: 5, + refillPerSec: 100, + }); + expect(result).toMatchObject({ allowed: true }); + + const rlHash = store.get("rl:t1"); + const tokensRaw = rlHash?.get("tokens"); + expect(tokensRaw).toBeDefined(); + const tokens = Number.parseFloat(tokensRaw ?? ""); + expect(tokens).toBeLessThanOrEqual(4); + }); + + it("handles zero refill rate", () => { + const store = createRedisStore(); + + for (let i = 0; i < 10; i++) { + runAdmit(store, { refillPerSec: 0 }); + } + + const result = runAdmit(store, { refillPerSec: 0 }); + expect(result).toMatchObject({ + allowed: false, + reason: "rate_limited", + retryAfterMs: 1000, + }); + }); + }); + + describe("circuit breaker", () => { + it("blocks requests when circuit is open", () => { + const store = createRedisStore(); + const now = 1_000_000; + const openedUntil = now + 60_000; + + store.set( + "cb:t1", + new Map([ + ["opened_until_ms", openedUntil.toString()], + ["last_probe_ms", now.toString()], + ]), + ); + + const result = runAdmit(store, { now }); + expect(result).toMatchObject({ + allowed: false, + reason: "circuit_open", + effectiveRate: 0, + }); + }); + + it("returns retryAfterMs for open circuit", () => { + const store = createRedisStore(); + const now = 1_000_000; + const openedUntil = now + 30_000; + + store.set( + "cb:t1", + new Map([ + ["opened_until_ms", openedUntil.toString()], + ["last_probe_ms", now.toString()], + ]), + ); + + const result = runAdmit(store, { now }); + expect(result).toMatchObject({ + allowed: false, + reason: "circuit_open", + retryAfterMs: 30_000, + }); + }); + + it("allows probe when probe interval has elapsed", () => { + const store = createRedisStore(); + const now = 1_000_000; + const openedUntil = now + 120_000; + const lastProbe = now - 61_000; + + store.set( + "cb:t1", + new Map([ + ["opened_until_ms", openedUntil.toString()], + ["last_probe_ms", lastProbe.toString()], + ]), + ); + + const result = runAdmit(store, { + now, + cbProbeIntervalMs: 60_000, + }); + expect(result).toEqual({ + allowed: true, + probe: true, + effectiveRate: 0, + }); + + const cbHash = store.get("cb:t1")!; + expect(cbHash.get("last_probe_ms")).toBe(now.toString()); + }); + + it("does not probe when interval has not elapsed", () => { + const store = createRedisStore(); + const now = 1_000_000; + const openedUntil = now + 120_000; + const lastProbe = now - 30_000; + + store.set( + "cb:t1", + new Map([ + ["opened_until_ms", openedUntil.toString()], + ["last_probe_ms", lastProbe.toString()], + ]), + ); + + const result = runAdmit(store, { + now, + cbProbeIntervalMs: 60_000, + }); + expect(result).toMatchObject({ + allowed: false, + reason: "circuit_open", + }); + }); + + it("skips circuit breaker when disabled", () => { + const store = createRedisStore(); + const now = 1_000_000; + const openedUntil = now + 60_000; + + store.set( + "cb:t1", + new Map([["opened_until_ms", openedUntil.toString()]]), + ); + + const result = runAdmit(store, { now, cbEnabled: false }); + expect(result).toMatchObject({ allowed: true, probe: false }); + }); + }); + + describe("decay scaling", () => { + it("applies reduced rate during decay period", () => { + const store = createRedisStore(); + const closedAt = 1_000_000; + const decayPeriodMs = 300_000; + const halfwayThrough = closedAt + decayPeriodMs / 2; + + store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); + + const result = runAdmit(store, { + now: halfwayThrough, + refillPerSec: 10, + decayPeriodMs, + }); + expect(result).toMatchObject({ allowed: true }); + expect((result as { effectiveRate: number }).effectiveRate).toBeCloseTo( + 5, + 0, + ); + }); + + it("uses full rate after decay period ends", () => { + const store = createRedisStore(); + const closedAt = 1_000_000; + const decayPeriodMs = 300_000; + const afterDecay = closedAt + decayPeriodMs + 1; + + store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); + + const result = runAdmit(store, { + now: afterDecay, + refillPerSec: 10, + decayPeriodMs, + }); + expect(result).toMatchObject({ + allowed: true, + effectiveRate: 10, + }); + }); + + it("clamps minimum effective rate to 0.001", () => { + const store = createRedisStore(); + const closedAt = 1_000_000; + const decayPeriodMs = 300_000; + const veryEarly = closedAt + 1; + + store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); + + const result = runAdmit(store, { + now: veryEarly, + refillPerSec: 10, + decayPeriodMs, + }); + const rate = (result as { effectiveRate: number }).effectiveRate; + expect(rate).toBeGreaterThanOrEqual(0.001); + }); + + it("does not decay when decayPeriodMs is 0", () => { + const store = createRedisStore(); + const closedAt = 1_000_000; + + store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); + + const result = runAdmit(store, { + now: closedAt + 1, + refillPerSec: 10, + decayPeriodMs: 0, + }); + expect(result).toMatchObject({ + allowed: true, + effectiveRate: 10, + }); + }); + + it("does not decay when circuit breaker is disabled", () => { + const store = createRedisStore(); + const closedAt = 1_000_000; + + store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); + + const result = runAdmit(store, { + now: closedAt + 1, + refillPerSec: 10, + decayPeriodMs: 300_000, + cbEnabled: false, + }); + expect(result).toMatchObject({ + allowed: true, + effectiveRate: 10, + }); + }); + }); + + describe("redis state persistence", () => { + it("persists token count and last_refill_ms", () => { + const store = createRedisStore(); + runAdmit(store, { now: 1_000_000, capacity: 5 }); + + const rlHash = store.get("rl:t1")!; + expect(rlHash.get("tokens")).toBeDefined(); + expect(rlHash.get("last_refill_ms")).toBe("1000000"); + }); + }); +}); diff --git a/lambdas/https-client-lambda/src/__tests__/helpers/fengari.d.ts b/lambdas/https-client-lambda/src/__tests__/helpers/fengari.d.ts new file mode 100644 index 00000000..e40c1e59 --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/helpers/fengari.d.ts @@ -0,0 +1,32 @@ +declare module "fengari" { + type LuaState = object; + + const lua: { + LUA_OK: number; + lua_close(L: LuaState): void; + lua_createtable(L: LuaState, narr: number, nrec: number): void; + lua_getglobal(L: LuaState, name: Uint8Array): number; + lua_gettop(L: LuaState): number; + lua_pushboolean(L: LuaState, b: number): void; + lua_pushcfunction(L: LuaState, fn: (L: LuaState) => number): void; + lua_pushinteger(L: LuaState, n: number): void; + lua_pushstring(L: LuaState, s: Uint8Array): void; + lua_rawseti(L: LuaState, idx: number, n: number): void; + lua_setglobal(L: LuaState, name: Uint8Array): void; + lua_tostring(L: LuaState, idx: number): Uint8Array; + }; + + const lauxlib: { + luaL_dostring(L: LuaState, s: Uint8Array): number; + luaL_newstate(): LuaState; + }; + + const lualib: { + luaL_openlibs(L: LuaState): void; + }; + + // eslint-disable-next-line @typescript-eslint/naming-convention -- fengari uses snake_case names + function to_jsstring(s: Uint8Array): string; + // eslint-disable-next-line @typescript-eslint/naming-convention -- fengari uses snake_case names + function to_luastring(s: string): Uint8Array; +} diff --git a/lambdas/https-client-lambda/src/__tests__/helpers/lua-redis-mock.ts b/lambdas/https-client-lambda/src/__tests__/helpers/lua-redis-mock.ts new file mode 100644 index 00000000..adee5a1a --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/helpers/lua-redis-mock.ts @@ -0,0 +1,144 @@ +import { + lauxlib, + lua, + lualib, + to_jsstring as toJsstring, + to_luastring as toLuastring, +} from "fengari"; + +type LuaState = ReturnType; +type RedisStore = Map>; + +export function createRedisStore(): RedisStore { + return new Map(); +} + +function hget( + store: RedisStore, + key: string, + field: string, +): string | undefined { + return store.get(key)?.get(field); +} + +function hset(store: RedisStore, ...args: string[]): number { + const key = args[0]; + const hash = store.get(key) ?? new Map(); + store.set(key, hash); + let fieldsSet = 0; + for (let i = 1; i < args.length; i += 2) { + // eslint-disable-next-line security/detect-object-injection -- args is a controlled array from redis HSET parsing + hash.set(args[i], args[i + 1]); + fieldsSet += 1; + } + return fieldsSet; +} + +function redisCallHandler(L: LuaState, store: RedisStore): number { + const cmd = toJsstring(lua.lua_tostring(L, 1)).toUpperCase(); + if (cmd === "HGET") { + const key = toJsstring(lua.lua_tostring(L, 2)); + const field = toJsstring(lua.lua_tostring(L, 3)); + const val = hget(store, key, field); + if (val === undefined) { + lua.lua_pushboolean(L, 0); + } else { + lua.lua_pushstring(L, toLuastring(val)); + } + return 1; + } + if (cmd === "HSET") { + const nArgs = lua.lua_gettop(L); + const args: string[] = []; + for (let i = 2; i <= nArgs; i++) { + args.push(toJsstring(lua.lua_tostring(L, i))); + } + const count = hset(store, ...args); + lua.lua_pushinteger(L, count); + return 1; + } + throw new Error(`Unsupported Redis command in mock: ${cmd}`); +} + +const CJSON_AND_REDIS_PREAMBLE = ` + cjson = {} + function cjson.encode(t) + if type(t) ~= "table" then return tostring(t) end + local parts = {} + for k, v in pairs(t) do + local key = '"' .. tostring(k) .. '"' + local val + if type(v) == "boolean" then + val = v and "true" or "false" + elseif type(v) == "number" then + if v == math.floor(v) and v < 1e15 and v > -1e15 then + val = string.format("%d", v) + else + val = tostring(v) + end + elseif type(v) == "string" then + val = '"' .. v .. '"' + else + val = tostring(v) + end + parts[#parts + 1] = key .. ":" .. val + end + return "{" .. table.concat(parts, ",") .. "}" + end + + redis = {} + function redis.call(cmd, ...) + return __redis_call(cmd, ...) + end +`; + +function registerRedisCallGlobal(L: LuaState, store: RedisStore): void { + lua.lua_pushcfunction(L, (ls: LuaState) => redisCallHandler(ls, store)); + lua.lua_setglobal(L, toLuastring("__redis_call")); +} + +function installCjsonAndRedisShims(L: LuaState): void { + lauxlib.luaL_dostring(L, toLuastring(CJSON_AND_REDIS_PREAMBLE)); +} + +function setStringArrayGlobal( + L: LuaState, + name: string, + values: string[], +): void { + lua.lua_createtable(L, values.length, 0); + for (const [i, value] of values.entries()) { + lua.lua_pushstring(L, toLuastring(value)); + lua.lua_rawseti(L, -2, i + 1); + } + lua.lua_setglobal(L, toLuastring(name)); +} + +function runScript(L: LuaState, script: string): string { + const status = lauxlib.luaL_dostring(L, toLuastring(script)); + if (status !== lua.LUA_OK) { + const errMsg = toJsstring(lua.lua_tostring(L, -1)); + throw new Error(`Lua error: ${errMsg}`); + } + return toJsstring(lua.lua_tostring(L, -1)); +} + +export function evalLua( + script: string, + keys: string[], + argv: string[], + store: RedisStore, +): unknown { + const L: LuaState = lauxlib.luaL_newstate(); + lualib.luaL_openlibs(L); + + try { + registerRedisCallGlobal(L, store); + installCjsonAndRedisShims(L); + setStringArrayGlobal(L, "KEYS", keys); + setStringArrayGlobal(L, "ARGV", argv); + return JSON.parse(runScript(L, script)); + } finally { + lua.lua_close(L); + } +} diff --git a/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts b/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts new file mode 100644 index 00000000..2c809559 --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts @@ -0,0 +1,316 @@ +import recordResultLuaSrc from "services/record-result.lua"; +import { createRedisStore, evalLua } from "__tests__/helpers/lua-redis-mock"; + +type RecordResultArgs = { + now: number; + success: boolean; + cbWindowPeriodMs: number; + cbErrorThreshold: number; + cbMinAttempts: number; + cbCooldownMs: number; + decayPeriodMs: number; +}; + +const defaultArgs: RecordResultArgs = { + now: 1_000_000, + success: true, + cbWindowPeriodMs: 60_000, + cbErrorThreshold: 0.5, + cbMinAttempts: 10, + cbCooldownMs: 60_000, + decayPeriodMs: 300_000, +}; + +function runRecordResult( + store: ReturnType, + args: Partial = {}, + targetId = "t1", +) { + const merged = { ...defaultArgs, ...args }; + return evalLua( + recordResultLuaSrc, + [`cb:${targetId}`], + [ + merged.now.toString(), + merged.success ? "1" : "0", + merged.cbWindowPeriodMs.toString(), + merged.cbErrorThreshold.toString(), + merged.cbMinAttempts.toString(), + merged.cbCooldownMs.toString(), + merged.decayPeriodMs.toString(), + ], + store, + ); +} + +describe("record-result.lua", () => { + describe("success recording", () => { + it("returns closed state for a successful result", () => { + const store = createRedisStore(); + const result = runRecordResult(store, { success: true }); + expect(result).toEqual({ ok: true, state: "closed" }); + }); + + it("increments attempt count without incrementing failures", () => { + const store = createRedisStore(); + runRecordResult(store, { success: true }); + + const cbHash = store.get("cb:t1")!; + expect(cbHash.get("cb_attempts")).toBe("1"); + expect(cbHash.get("cb_failures")).toBe("0"); + }); + }); + + describe("failure recording", () => { + it("increments both attempts and failures on error", () => { + const store = createRedisStore(); + runRecordResult(store, { success: false }); + + const cbHash = store.get("cb:t1")!; + expect(cbHash.get("cb_attempts")).toBe("1"); + expect(cbHash.get("cb_failures")).toBe("1"); + }); + + it("stays closed when below error threshold", () => { + const store = createRedisStore(); + const now = 1_000_000; + + for (let i = 0; i < 8; i++) { + runRecordResult(store, { now, success: true }); + } + for (let i = 0; i < 2; i++) { + runRecordResult(store, { now, success: false }); + } + + const result = runRecordResult(store, { now, success: true }); + expect(result).toEqual({ ok: true, state: "closed" }); + }); + }); + + describe("circuit opening", () => { + it("opens circuit when error rate exceeds threshold", () => { + const store = createRedisStore(); + const now = 1_000_000; + + for (let i = 0; i < 10; i++) { + runRecordResult(store, { + now, + success: false, + cbMinAttempts: 5, + cbErrorThreshold: 0.5, + }); + } + + const result = runRecordResult(store, { + now, + success: false, + cbMinAttempts: 5, + cbErrorThreshold: 0.5, + }); + expect(result).toEqual({ ok: false, state: "opened" }); + }); + + it("does not open circuit when below minimum attempts", () => { + const store = createRedisStore(); + const now = 1_000_000; + + for (let i = 0; i < 4; i++) { + runRecordResult(store, { + now, + success: false, + cbMinAttempts: 10, + }); + } + + const result = runRecordResult(store, { + now, + success: false, + cbMinAttempts: 10, + }); + expect(result).toEqual({ ok: true, state: "closed" }); + }); + + it("sets opened_until_ms with cooldown on open", () => { + const store = createRedisStore(); + const now = 1_000_000; + const cbCooldownMs = 30_000; + + for (let i = 0; i < 10; i++) { + runRecordResult(store, { + now, + success: false, + cbMinAttempts: 5, + cbErrorThreshold: 0.5, + cbCooldownMs, + }); + } + + const cbHash = store.get("cb:t1")!; + expect(Number(cbHash.get("opened_until_ms"))).toBe(now + cbCooldownMs); + }); + }); + + describe("window rolling", () => { + it("rolls window when period expires", () => { + const store = createRedisStore(); + const windowPeriodMs = 60_000; + const t0 = 1_000_000; + const t1 = t0 + windowPeriodMs + 1; + + for (let i = 0; i < 3; i++) { + runRecordResult(store, { + now: t0, + success: false, + cbWindowPeriodMs: windowPeriodMs, + }); + } + + runRecordResult(store, { + now: t1, + success: true, + cbWindowPeriodMs: windowPeriodMs, + }); + + const cbHash = store.get("cb:t1")!; + expect(cbHash.get("cb_prev_failures")).toBe("3"); + expect(cbHash.get("cb_prev_attempts")).toBe("3"); + expect(cbHash.get("cb_attempts")).toBe("1"); + expect(cbHash.get("cb_failures")).toBe("0"); + }); + + it("initialises window_from on first call", () => { + const store = createRedisStore(); + const now = 1_000_000; + + runRecordResult(store, { now }); + + const cbHash = store.get("cb:t1")!; + expect(cbHash.get("cb_window_from")).toBe(now.toString()); + }); + }); + + describe("two-window blended rate", () => { + it("blends previous window failures into current assessment", () => { + const store = createRedisStore(); + const windowPeriodMs = 60_000; + const t0 = 1_000_000; + + for (let i = 0; i < 10; i++) { + runRecordResult(store, { + now: t0, + success: false, + cbWindowPeriodMs: windowPeriodMs, + cbMinAttempts: 5, + cbErrorThreshold: 0.5, + }); + } + + const justAfterRoll = t0 + windowPeriodMs + 1; + const result = runRecordResult(store, { + now: justAfterRoll, + success: false, + cbWindowPeriodMs: windowPeriodMs, + cbMinAttempts: 5, + cbErrorThreshold: 0.5, + }); + + expect(result).toEqual({ ok: false, state: "opened" }); + }); + + it("reduces previous window weight as current window progresses", () => { + const store = createRedisStore(); + const windowPeriodMs = 100_000; + const t0 = 1_000_000; + + for (let i = 0; i < 10; i++) { + runRecordResult(store, { + now: t0, + success: false, + cbWindowPeriodMs: windowPeriodMs, + cbMinAttempts: 5, + cbErrorThreshold: 0.5, + }); + } + + const nearEndOfWindow = t0 + windowPeriodMs + windowPeriodMs - 1; + for (let i = 0; i < 20; i++) { + runRecordResult(store, { + now: nearEndOfWindow, + success: true, + cbWindowPeriodMs: windowPeriodMs, + cbMinAttempts: 5, + cbErrorThreshold: 0.5, + }); + } + + const result = runRecordResult(store, { + now: nearEndOfWindow, + success: true, + cbWindowPeriodMs: windowPeriodMs, + cbMinAttempts: 5, + cbErrorThreshold: 0.5, + }); + expect(result).toEqual({ ok: true, state: "closed" }); + }); + }); + + describe("decay period", () => { + it("clears opened_until_ms after decay period elapses", () => { + const store = createRedisStore(); + const openedAt = 1_000_000; + const cooldownMs = 60_000; + const decayPeriodMs = 300_000; + const openedUntil = openedAt + cooldownMs; + const afterDecay = openedUntil + decayPeriodMs + 1; + + store.set( + "cb:t1", + new Map([["opened_until_ms", openedUntil.toString()]]), + ); + + runRecordResult(store, { + now: afterDecay, + success: true, + decayPeriodMs, + }); + + const cbHash = store.get("cb:t1")!; + expect(cbHash.get("opened_until_ms")).toBe("0"); + }); + + it("preserves opened_until_ms during active decay", () => { + const store = createRedisStore(); + const openedUntil = 1_060_000; + const duringDecay = openedUntil + 100_000; + + store.set( + "cb:t1", + new Map([["opened_until_ms", openedUntil.toString()]]), + ); + + runRecordResult(store, { + now: duringDecay, + success: true, + decayPeriodMs: 300_000, + }); + + const cbHash = store.get("cb:t1")!; + expect(Number(cbHash.get("opened_until_ms"))).toBe(openedUntil); + }); + }); + + describe("state persistence", () => { + it("writes all counter fields to redis", () => { + const store = createRedisStore(); + runRecordResult(store); + + const cbHash = store.get("cb:t1")!; + expect(cbHash.has("opened_until_ms")).toBe(true); + expect(cbHash.has("cb_window_from")).toBe(true); + expect(cbHash.has("cb_failures")).toBe(true); + expect(cbHash.has("cb_attempts")).toBe(true); + expect(cbHash.has("cb_prev_failures")).toBe(true); + expect(cbHash.has("cb_prev_attempts")).toBe(true); + }); + }); +}); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0aeb10bd..2699d50c 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -398,6 +398,9 @@ importers: eslint: specifier: catalog:lint version: 9.39.4(jiti@2.6.1) + fengari: + specifier: ^0.1.5 + version: 0.1.5 jest: specifier: catalog:test version: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) @@ -3130,6 +3133,9 @@ packages: picomatch: optional: true + fengari@0.1.5: + resolution: {integrity: sha512-0DS4Nn4rV8qyFlQCpKK8brT61EUtswynrpfFTcgLErcilBIBskSMQ86fO2WVuybr14ywyKdRjv91FiRZwnEuvQ==} + fflate@0.8.1: resolution: {integrity: sha512-/exOvEuc+/iaUm105QIiOt4LpBdMTWsXxqR0HDF35vx3fmaKzw7354gTilCh5rkzEt8WYyG//ku3h3nRmd7CHQ==} @@ -4070,6 +4076,10 @@ packages: react-is@19.0.0: resolution: {integrity: sha512-H91OHcwjZsbq3ClIDHMzBShc1rotbfACdWENsmEf0IFvZ3FgGPtdHMcsv45bQ1hAbgdfiA8SnxTKfDS+x/8m2g==} + readline-sync@1.4.10: + resolution: {integrity: sha512-gNva8/6UAe8QYepIQH/jQ2qn91Qj0B9sYjMBBs3QOB8F2CXcKgLxQaJRP76sWVRQt+QU+8fAkCbCvjjMFu7Ycw==} + engines: {node: '>= 0.8.0'} + real-require@0.2.0: resolution: {integrity: sha512-57frrGM/OCTLqLOAh0mhVA9VBMHd+9U7Zb2THMGdBUoZVOtGbJzjxsYGDJ3A9AYYCP4hn6y1TVbaOfzWtm5GFg==} engines: {node: '>= 12.13.0'} @@ -4242,6 +4252,9 @@ packages: sprintf-js@1.0.3: resolution: {integrity: sha512-D9cPgkvLlV3t3IzL0D0YLvGA9Ahk4PcvVwUbN0dSGr1aP0Nrt4AEnTUbuGvquEC0mA64Gqt1fzirlRs5ibXx8g==} + sprintf-js@1.1.3: + resolution: {integrity: sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==} + stable-hash-x@0.2.0: resolution: {integrity: sha512-o3yWv49B/o4QZk5ZcsALc6t0+eCelPc44zZsLtCQnZPDwFpDYSWcDnrv2TtMmMbQ7uKo3J0HTURCqckw23czNQ==} engines: {node: '>=12.0.0'} @@ -4360,6 +4373,10 @@ packages: resolution: {integrity: sha512-j2Zq4NyQYG5XMST4cbs02Ak8iJUdxRM0XI5QyxXuZOzKOINmWurp3smXu3y5wDcJrptwpSjgXHzIQxR0omXljQ==} engines: {node: '>=12.0.0'} + tmp@0.2.5: + resolution: {integrity: sha512-voyz6MApa1rQGUxT3E+BK7/ROe8itEx7vD8/HEvt4xwXucvQ5G5oeEiHkmHZJuBO21RpOf+YYm9MOivj709jow==} + engines: {node: '>=14.14'} + tmpl@1.0.5: resolution: {integrity: sha512-3f0uOEAQwIqGuWW2MVzYg8fV/QNnc/IpuJNG837rLuczAaLVHslWHZQj4IGiEl5Hs3kkbhwL9Ab7Hrsmuj+Smw==} @@ -7945,6 +7962,12 @@ snapshots: optionalDependencies: picomatch: 4.0.4 + fengari@0.1.5: + dependencies: + readline-sync: 1.4.10 + sprintf-js: 1.1.3 + tmp: 0.2.5 + fflate@0.8.1: {} file-entry-cache@8.0.0: @@ -9217,6 +9240,8 @@ snapshots: react-is@19.0.0: {} + readline-sync@1.4.10: {} + real-require@0.2.0: {} refa@0.12.1: @@ -9409,6 +9434,8 @@ snapshots: sprintf-js@1.0.3: {} + sprintf-js@1.1.3: {} + stable-hash-x@0.2.0: {} stack-utils@2.0.6: @@ -9548,6 +9575,8 @@ snapshots: fdir: 6.5.0(picomatch@4.0.4) picomatch: 4.0.4 + tmp@0.2.5: {} + tmpl@1.0.5: {} to-regex-range@5.0.1: From 0bbcd7971e91c60a25b3227b14c7a2a2817d3c34 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Fri, 17 Apr 2026 15:56:02 +0100 Subject: [PATCH 15/65] Add luacheck to pre-commit and fix issue --- .luarc.json | 5 +- scripts/config/pre-commit.yaml | 7 ++ scripts/githooks/check-lua-format.sh | 100 +++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 1 deletion(-) create mode 100755 scripts/githooks/check-lua-format.sh diff --git a/.luarc.json b/.luarc.json index 25564465..09ef6a0c 100644 --- a/.luarc.json +++ b/.luarc.json @@ -3,7 +3,10 @@ "globals": [ "KEYS", "ARGV", - "redis" + "redis", + "cjson", + "cmsgpack", + "bit" ] } } diff --git a/scripts/config/pre-commit.yaml b/scripts/config/pre-commit.yaml index 1e1da873..221b38f1 100644 --- a/scripts/config/pre-commit.yaml +++ b/scripts/config/pre-commit.yaml @@ -79,3 +79,10 @@ repos: entry: pnpm exec knip --no-progress language: system pass_filenames: false + - repo: local + hooks: + - id: check-lua-format + name: Check Lua format + entry: /usr/bin/env check=branch ./scripts/githooks/check-lua-format.sh + language: script + pass_filenames: false diff --git a/scripts/githooks/check-lua-format.sh b/scripts/githooks/check-lua-format.sh new file mode 100755 index 00000000..ec8fdf75 --- /dev/null +++ b/scripts/githooks/check-lua-format.sh @@ -0,0 +1,100 @@ +#!/bin/bash + +set -euo pipefail + +# Pre-commit git hook to lint Lua files using luacheck. Runs luacheck natively +# if installed, otherwise falls back to Docker. +# +# Usage: +# $ [options] ./check-lua-format.sh +# +# Options: +# check={all,staged-changes,working-tree-changes,branch} # Check mode, default is 'working-tree-changes' +# BRANCH_NAME=other-branch-than-main # Branch to compare with, default is `origin/main` +# FORCE_USE_DOCKER=true # If set to true the command is run in a Docker container, default is 'false' +# VERBOSE=true # Show all the executed commands, default is `false` + +# ============================================================================== + +function main() { + + cd "$(git rev-parse --show-toplevel)" + + check=${check:-working-tree-changes} + case $check in + "all") + files="$(git ls-files "*.lua")" + ;; + "staged-changes") + files="$(git diff --diff-filter=ACMRT --name-only --cached "*.lua")" + ;; + "working-tree-changes") + files="$(git diff --diff-filter=ACMRT --name-only "*.lua")" + ;; + "branch") + files="$( (git diff --diff-filter=ACMRT --name-only "${BRANCH_NAME:-origin/main}" "*.lua"; git diff --name-only "*.lua") | sort | uniq )" + ;; + *) + echo "Unrecognised check mode: $check" >&2 && exit 1 + ;; + esac + + if [ -n "$files" ]; then + # shellcheck disable=SC2155 + local globals=$(jq -r '.diagnostics.globals[]' .luarc.json | tr '\n' ' ') + if command -v luacheck > /dev/null 2>&1 && ! is-arg-true "${FORCE_USE_DOCKER:-false}"; then + files="$files" globals="$globals" run-luacheck-natively + else + files="$files" globals="$globals" run-luacheck-in-docker + fi + fi +} + +# Run luacheck natively. +# Arguments (provided as environment variables): +# files=[files to check] +# globals=[space-separated list of global names] +function run-luacheck-natively() { + + # shellcheck disable=SC2086 + luacheck $files --globals $globals +} + +# Run luacheck in a Docker container. +# Arguments (provided as environment variables): +# files=[files to check] +# globals=[space-separated list of global names] +function run-luacheck-in-docker() { + + # shellcheck disable=SC1091 + source ./scripts/docker/docker.lib.sh + + # shellcheck disable=SC2155 + local image=$(name=pipelinecomponents/luacheck docker-get-image-version-and-pull) + # shellcheck disable=SC2086 + docker run --rm --platform linux/amd64 \ + --volume "$PWD":/data \ + --workdir /data \ + --entrypoint luacheck \ + "$image" \ + $files --globals $globals +} + +# ============================================================================== + +function is-arg-true() { + + if [[ "$1" =~ ^(true|yes|y|on|1|TRUE|YES|Y|ON)$ ]]; then + return 0 + else + return 1 + fi +} + +# ============================================================================== + +is-arg-true "${VERBOSE:-false}" && set -x + +main "$@" + +exit 0 From b7297d7acd6b50c9653e149707effd30396809ef Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Fri, 17 Apr 2026 16:06:48 +0100 Subject: [PATCH 16/65] luacheck in CI workflow --- .github/workflows/stage-2-test.yaml | 10 ++++++++++ scripts/tests/lua-lint.sh | 7 +++++++ scripts/tests/test.mk | 3 +++ 3 files changed, 20 insertions(+) create mode 100755 scripts/tests/lua-lint.sh diff --git a/.github/workflows/stage-2-test.yaml b/.github/workflows/stage-2-test.yaml index b28d6227..c0ba1c8d 100644 --- a/.github/workflows/stage-2-test.yaml +++ b/.github/workflows/stage-2-test.yaml @@ -125,6 +125,16 @@ jobs: - name: "Run linting" run: | make test-lint + test-lua-lint: + name: "Lua linting" + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: "Checkout code" + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + - name: "Run luacheck" + run: | + make test-lua-lint test-typecheck: name: "Typecheck" runs-on: ubuntu-latest diff --git a/scripts/tests/lua-lint.sh b/scripts/tests/lua-lint.sh new file mode 100755 index 00000000..ae271da4 --- /dev/null +++ b/scripts/tests/lua-lint.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +set -euo pipefail + +cd "$(git rev-parse --show-toplevel)" + +check=all ./scripts/githooks/check-lua-format.sh diff --git a/scripts/tests/test.mk b/scripts/tests/test.mk index d9303d92..a94a5af0 100644 --- a/scripts/tests/test.mk +++ b/scripts/tests/test.mk @@ -14,6 +14,9 @@ test-unit: # Run your unit tests from scripts/test/unit @Testing test-lint: # Lint your code from scripts/test/lint @Testing make _test name="lint" +test-lua-lint: # Lint Lua scripts @Testing + make _test name="lua-lint" + test-typecheck: # Typecheck your code from scripts/test/typecheck @Testing make _test name="typecheck" From e4cd754956bd918bfe76311c9c7cd699462c9a1c Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Mon, 20 Apr 2026 12:18:26 +0100 Subject: [PATCH 17/65] CCM-16073 - PR feedback --- .../src/__tests__/admit-lua.test.ts | 345 ++++++++++++------ .../src/__tests__/helpers/lua-redis-mock.ts | 104 +++--- .../src/__tests__/record-result-lua.test.ts | 249 ++++++++----- 3 files changed, 442 insertions(+), 256 deletions(-) diff --git a/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts b/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts index bf64f5e0..6aab4727 100644 --- a/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts @@ -1,71 +1,88 @@ import admitLuaSrc from "services/admit.lua"; import { createRedisStore, evalLua } from "__tests__/helpers/lua-redis-mock"; +// ARGV: [now, capacity, refillPerSec, cooldownMs, decayPeriodMs, cbWindowPeriodMs, cbProbeIntervalMs] +// KEYS: [cbKey, rlKey] +// Returns: [allowed (0|1), reason, retryAfterMs, effectiveRate] + type AdmitArgs = { now: number; - refillPerSec: number; capacity: number; - cbProbeIntervalMs: number; - cbEnabled: boolean; + refillPerSec: number; + cooldownMs: number; decayPeriodMs: number; + cbWindowPeriodMs: number; + cbProbeIntervalMs: number; }; const defaultArgs: AdmitArgs = { now: 1_000_000, - refillPerSec: 10, capacity: 10, - cbProbeIntervalMs: 60_000, - cbEnabled: true, + refillPerSec: 10, + cooldownMs: 60_000, decayPeriodMs: 300_000, + cbWindowPeriodMs: 60_000, + cbProbeIntervalMs: 60_000, +}; + +type AdmitResult = { + allowed: number; + reason: string; + retryAfterMs: number; + effectiveRate: number; }; function runAdmit( store: ReturnType, args: Partial = {}, targetId = "t1", -) { +): AdmitResult { const merged = { ...defaultArgs, ...args }; - return evalLua( + const raw = evalLua( admitLuaSrc, - [`rl:${targetId}`, `cb:${targetId}`], + [`cb:${targetId}`, `rl:${targetId}`], [ merged.now.toString(), - merged.refillPerSec.toString(), merged.capacity.toString(), - merged.cbProbeIntervalMs.toString(), - merged.cbEnabled ? "1" : "0", + merged.refillPerSec.toString(), + merged.cooldownMs.toString(), merged.decayPeriodMs.toString(), + merged.cbWindowPeriodMs.toString(), + merged.cbProbeIntervalMs.toString(), ], store, - ); + ) as [number, string, number, number]; + return { + allowed: raw[0], + reason: raw[1], + retryAfterMs: raw[2], + effectiveRate: raw[3], + }; } describe("admit.lua", () => { describe("rate limiting", () => { it("allows the first request with full token bucket", () => { const store = createRedisStore(); - const result = runAdmit(store); + const { allowed, effectiveRate, reason, retryAfterMs } = runAdmit(store); - expect(result).toEqual({ - allowed: true, - probe: false, - effectiveRate: 10, - }); + expect(allowed).toBe(1); + expect(reason).toBe("allowed"); + expect(retryAfterMs).toBe(0); + expect(effectiveRate).toBe(10); }); - it("deducts tokens on each call", () => { + it("depletes tokens on consecutive calls and rejects when empty", () => { const store = createRedisStore(); for (let i = 0; i < 10; i++) { - const result = runAdmit(store); - expect(result).toMatchObject({ allowed: true }); + const { allowed } = runAdmit(store); + expect(allowed).toBe(1); } - const result = runAdmit(store); - expect(result).toMatchObject({ - allowed: false, - reason: "rate_limited", - }); + const { allowed, reason } = runAdmit(store); + expect(allowed).toBe(0); + expect(reason).toBe("rate_limited"); }); it("returns retryAfterMs when rate limited", () => { @@ -75,15 +92,19 @@ describe("admit.lua", () => { runAdmit(store); } - const result = runAdmit(store); - expect(result).toMatchObject({ - allowed: false, - reason: "rate_limited", - retryAfterMs: expect.any(Number), - }); - expect((result as { retryAfterMs: number }).retryAfterMs).toBeGreaterThan( - 0, - ); + const { retryAfterMs } = runAdmit(store); + expect(retryAfterMs).toBe(1000); + }); + + it("reports effective rate when rate limited", () => { + const store = createRedisStore(); + + for (let i = 0; i < 10; i++) { + runAdmit(store); + } + + const { effectiveRate } = runAdmit(store); + expect(effectiveRate).toBe(10); }); it("refills tokens over time", () => { @@ -95,28 +116,24 @@ describe("admit.lua", () => { } const denied = runAdmit(store, { now }); - expect(denied).toMatchObject({ allowed: false }); + expect(denied.allowed).toBe(0); - const later = now + 1000; - const result = runAdmit(store, { now: later }); - expect(result).toMatchObject({ allowed: true }); + const refilled = runAdmit(store, { now: now + 1000 }); + expect(refilled.allowed).toBe(1); }); it("caps tokens at capacity", () => { const store = createRedisStore(); + const now = 1_000_000; - const result = runAdmit(store, { - now: 1_000_000, - capacity: 5, - refillPerSec: 100, - }); - expect(result).toMatchObject({ allowed: true }); + runAdmit(store, { now, capacity: 5, refillPerSec: 100 }); + + // Advance 10 seconds — would add 1000 tokens without cap + runAdmit(store, { now: now + 10_000, capacity: 5, refillPerSec: 100 }); - const rlHash = store.get("rl:t1"); - const tokensRaw = rlHash?.get("tokens"); - expect(tokensRaw).toBeDefined(); - const tokens = Number.parseFloat(tokensRaw ?? ""); - expect(tokens).toBeLessThanOrEqual(4); + const rlHash = store.get("rl:t1")!; + // Refill capped to capacity (5), then one consumed → 4 + expect(Number(rlHash.get("tokens"))).toBe(4); }); it("handles zero refill rate", () => { @@ -126,17 +143,17 @@ describe("admit.lua", () => { runAdmit(store, { refillPerSec: 0 }); } - const result = runAdmit(store, { refillPerSec: 0 }); - expect(result).toMatchObject({ - allowed: false, - reason: "rate_limited", - retryAfterMs: 1000, + const { allowed, reason, retryAfterMs } = runAdmit(store, { + refillPerSec: 0, }); + expect(allowed).toBe(0); + expect(reason).toBe("rate_limited"); + expect(retryAfterMs).toBe(1000); }); }); describe("circuit breaker", () => { - it("blocks requests when circuit is open", () => { + it("rejects when circuit is open", () => { const store = createRedisStore(); const now = 1_000_000; const openedUntil = now + 60_000; @@ -149,12 +166,10 @@ describe("admit.lua", () => { ]), ); - const result = runAdmit(store, { now }); - expect(result).toMatchObject({ - allowed: false, - reason: "circuit_open", - effectiveRate: 0, - }); + const { allowed, effectiveRate, reason } = runAdmit(store, { now }); + expect(allowed).toBe(0); + expect(reason).toBe("circuit_open"); + expect(effectiveRate).toBe(0); }); it("returns retryAfterMs for open circuit", () => { @@ -170,12 +185,8 @@ describe("admit.lua", () => { ]), ); - const result = runAdmit(store, { now }); - expect(result).toMatchObject({ - allowed: false, - reason: "circuit_open", - retryAfterMs: 30_000, - }); + const { retryAfterMs } = runAdmit(store, { now }); + expect(retryAfterMs).toBe(30_000); }); it("allows probe when probe interval has elapsed", () => { @@ -192,15 +203,31 @@ describe("admit.lua", () => { ]), ); - const result = runAdmit(store, { + const { allowed, effectiveRate, reason, retryAfterMs } = runAdmit(store, { now, cbProbeIntervalMs: 60_000, }); - expect(result).toEqual({ - allowed: true, - probe: true, - effectiveRate: 0, - }); + expect(allowed).toBe(1); + expect(reason).toBe("probe"); + expect(retryAfterMs).toBe(0); + expect(effectiveRate).toBe(0); + }); + + it("updates last_probe_ms after allowing a probe", () => { + const store = createRedisStore(); + const now = 1_000_000; + const openedUntil = now + 120_000; + const lastProbe = now - 61_000; + + store.set( + "cb:t1", + new Map([ + ["opened_until_ms", openedUntil.toString()], + ["last_probe_ms", lastProbe.toString()], + ]), + ); + + runAdmit(store, { now, cbProbeIntervalMs: 60_000 }); const cbHash = store.get("cb:t1")!; expect(cbHash.get("last_probe_ms")).toBe(now.toString()); @@ -220,28 +247,99 @@ describe("admit.lua", () => { ]), ); - const result = runAdmit(store, { + const { allowed, reason } = runAdmit(store, { now, cbProbeIntervalMs: 60_000, }); - expect(result).toMatchObject({ - allowed: false, - reason: "circuit_open", + expect(allowed).toBe(0); + expect(reason).toBe("circuit_open"); + }); + + it("does not probe when cbProbeIntervalMs is 0", () => { + const store = createRedisStore(); + const now = 1_000_000; + const openedUntil = now + 120_000; + + store.set( + "cb:t1", + new Map([ + ["opened_until_ms", openedUntil.toString()], + ["last_probe_ms", "0"], + ]), + ); + + const { allowed, reason } = runAdmit(store, { + now, + cbProbeIntervalMs: 0, }); + expect(allowed).toBe(0); + expect(reason).toBe("circuit_open"); }); + }); - it("skips circuit breaker when disabled", () => { + describe("sliding window", () => { + it("initialises cbWindowFrom on first call", () => { const store = createRedisStore(); const now = 1_000_000; - const openedUntil = now + 60_000; + + runAdmit(store, { now }); + + const cbHash = store.get("cb:t1")!; + expect(cbHash.get("cb_window_from")).toBe(now.toString()); + }); + + it("rolls current window to previous when period expires", () => { + const store = createRedisStore(); + const cbWindowPeriodMs = 60_000; + const t0 = 1_000_000; + const t1 = t0 + cbWindowPeriodMs + 1; store.set( "cb:t1", - new Map([["opened_until_ms", openedUntil.toString()]]), + new Map([ + ["cb_window_from", t0.toString()], + ["cb_failures", "5"], + ["cb_attempts", "10"], + ["cb_prev_failures", "0"], + ["cb_prev_attempts", "0"], + ]), ); - const result = runAdmit(store, { now, cbEnabled: false }); - expect(result).toMatchObject({ allowed: true, probe: false }); + runAdmit(store, { now: t1, cbWindowPeriodMs }); + + const cbHash = store.get("cb:t1")!; + expect(cbHash.get("cb_prev_failures")).toBe("5"); + expect(cbHash.get("cb_prev_attempts")).toBe("10"); + expect(cbHash.get("cb_failures")).toBe("0"); + expect(cbHash.get("cb_attempts")).toBe("0"); + expect(cbHash.get("cb_window_from")).toBe(t1.toString()); + }); + + it("clears both windows when gap exceeds two periods", () => { + const store = createRedisStore(); + const cbWindowPeriodMs = 60_000; + const t0 = 1_000_000; + const t1 = t0 + 2 * cbWindowPeriodMs + 1; + + store.set( + "cb:t1", + new Map([ + ["cb_window_from", t0.toString()], + ["cb_failures", "5"], + ["cb_attempts", "10"], + ["cb_prev_failures", "3"], + ["cb_prev_attempts", "7"], + ]), + ); + + runAdmit(store, { now: t1, cbWindowPeriodMs }); + + const cbHash = store.get("cb:t1")!; + expect(cbHash.get("cb_prev_failures")).toBe("0"); + expect(cbHash.get("cb_prev_attempts")).toBe("0"); + expect(cbHash.get("cb_failures")).toBe("0"); + expect(cbHash.get("cb_attempts")).toBe("0"); + expect(cbHash.get("cb_window_from")).toBe(t1.toString()); }); }); @@ -254,16 +352,12 @@ describe("admit.lua", () => { store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); - const result = runAdmit(store, { + const { effectiveRate } = runAdmit(store, { now: halfwayThrough, refillPerSec: 10, decayPeriodMs, }); - expect(result).toMatchObject({ allowed: true }); - expect((result as { effectiveRate: number }).effectiveRate).toBeCloseTo( - 5, - 0, - ); + expect(effectiveRate).toBe(5); }); it("uses full rate after decay period ends", () => { @@ -274,18 +368,16 @@ describe("admit.lua", () => { store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); - const result = runAdmit(store, { + const { allowed, effectiveRate } = runAdmit(store, { now: afterDecay, refillPerSec: 10, decayPeriodMs, }); - expect(result).toMatchObject({ - allowed: true, - effectiveRate: 10, - }); + expect(allowed).toBe(1); + expect(effectiveRate).toBe(10); }); - it("clamps minimum effective rate to 0.001", () => { + it("clamps minimum effective rate to 1", () => { const store = createRedisStore(); const closedAt = 1_000_000; const decayPeriodMs = 300_000; @@ -293,52 +385,45 @@ describe("admit.lua", () => { store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); - const result = runAdmit(store, { + const { effectiveRate } = runAdmit(store, { now: veryEarly, refillPerSec: 10, decayPeriodMs, }); - const rate = (result as { effectiveRate: number }).effectiveRate; - expect(rate).toBeGreaterThanOrEqual(0.001); + expect(effectiveRate).toBeGreaterThanOrEqual(1); }); - it("does not decay when decayPeriodMs is 0", () => { + it("clears openedUntil when decay period fully elapses", () => { const store = createRedisStore(); const closedAt = 1_000_000; + const decayPeriodMs = 300_000; + const afterDecay = closedAt + decayPeriodMs + 1; store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); - const result = runAdmit(store, { - now: closedAt + 1, - refillPerSec: 10, - decayPeriodMs: 0, - }); - expect(result).toMatchObject({ - allowed: true, - effectiveRate: 10, - }); + runAdmit(store, { now: afterDecay, decayPeriodMs }); + + const cbHash = store.get("cb:t1")!; + expect(cbHash.get("opened_until_ms")).toBe("0"); }); - it("does not decay when circuit breaker is disabled", () => { + it("does not decay when decayPeriodMs is 0", () => { const store = createRedisStore(); const closedAt = 1_000_000; store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); - const result = runAdmit(store, { + const { allowed, effectiveRate } = runAdmit(store, { now: closedAt + 1, refillPerSec: 10, - decayPeriodMs: 300_000, - cbEnabled: false, - }); - expect(result).toMatchObject({ - allowed: true, - effectiveRate: 10, + decayPeriodMs: 0, }); + expect(allowed).toBe(1); + expect(effectiveRate).toBe(10); }); }); - describe("redis state persistence", () => { + describe("state persistence", () => { it("persists token count and last_refill_ms", () => { const store = createRedisStore(); runAdmit(store, { now: 1_000_000, capacity: 5 }); @@ -347,5 +432,29 @@ describe("admit.lua", () => { expect(rlHash.get("tokens")).toBeDefined(); expect(rlHash.get("last_refill_ms")).toBe("1000000"); }); + + it("persists circuit breaker fields", () => { + const store = createRedisStore(); + runAdmit(store, { now: 1_000_000 }); + + const cbHash = store.get("cb:t1")!; + expect(cbHash.has("opened_until_ms")).toBe(true); + expect(cbHash.has("cb_window_from")).toBe(true); + expect(cbHash.has("cb_failures")).toBe(true); + expect(cbHash.has("cb_attempts")).toBe(true); + expect(cbHash.has("cb_prev_failures")).toBe(true); + expect(cbHash.has("cb_prev_attempts")).toBe(true); + }); + + it("isolates state between targets", () => { + const store = createRedisStore(); + runAdmit(store, {}, "target-a"); + runAdmit(store, {}, "target-b"); + + expect(store.has("cb:target-a")).toBe(true); + expect(store.has("cb:target-b")).toBe(true); + expect(store.has("rl:target-a")).toBe(true); + expect(store.has("rl:target-b")).toBe(true); + }); }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/helpers/lua-redis-mock.ts b/lambdas/https-client-lambda/src/__tests__/helpers/lua-redis-mock.ts index adee5a1a..f6d11d50 100644 --- a/lambdas/https-client-lambda/src/__tests__/helpers/lua-redis-mock.ts +++ b/lambdas/https-client-lambda/src/__tests__/helpers/lua-redis-mock.ts @@ -13,22 +13,13 @@ export function createRedisStore(): RedisStore { return new Map(); } -function hget( - store: RedisStore, - key: string, - field: string, -): string | undefined { - return store.get(key)?.get(field); -} - -function hset(store: RedisStore, ...args: string[]): number { - const key = args[0]; +function hset(store: RedisStore, key: string, pairs: string[]): number { const hash = store.get(key) ?? new Map(); store.set(key, hash); let fieldsSet = 0; - for (let i = 1; i < args.length; i += 2) { - // eslint-disable-next-line security/detect-object-injection -- args is a controlled array from redis HSET parsing - hash.set(args[i], args[i + 1]); + for (let i = 0; i < pairs.length; i += 2) { + // eslint-disable-next-line security/detect-object-injection -- pairs is a controlled array from redis HSET parsing + hash.set(pairs[i], pairs[i + 1]); fieldsSet += 1; } return fieldsSet; @@ -36,52 +27,80 @@ function hset(store: RedisStore, ...args: string[]): number { function redisCallHandler(L: LuaState, store: RedisStore): number { const cmd = toJsstring(lua.lua_tostring(L, 1)).toUpperCase(); - if (cmd === "HGET") { + + if (cmd === "HMGET") { const key = toJsstring(lua.lua_tostring(L, 2)); - const field = toJsstring(lua.lua_tostring(L, 3)); - const val = hget(store, key, field); - if (val === undefined) { - lua.lua_pushboolean(L, 0); - } else { - lua.lua_pushstring(L, toLuastring(val)); + const nArgs = lua.lua_gettop(L); + const hash = store.get(key); + lua.lua_createtable(L, nArgs - 2, 0); + for (let i = 3; i <= nArgs; i++) { + const field = toJsstring(lua.lua_tostring(L, i)); + const val = hash?.get(field); + if (val === undefined) { + lua.lua_pushboolean(L, 0); + } else { + lua.lua_pushstring(L, toLuastring(val)); + } + lua.lua_rawseti(L, -2, i - 2); } return 1; } + if (cmd === "HSET") { + const key = toJsstring(lua.lua_tostring(L, 2)); const nArgs = lua.lua_gettop(L); - const args: string[] = []; - for (let i = 2; i <= nArgs; i++) { - args.push(toJsstring(lua.lua_tostring(L, i))); + const pairs: string[] = []; + for (let i = 3; i <= nArgs; i++) { + pairs.push(toJsstring(lua.lua_tostring(L, i))); } - const count = hset(store, ...args); + const count = hset(store, key, pairs); lua.lua_pushinteger(L, count); return 1; } + + if (cmd === "EXPIRE") { + lua.lua_pushinteger(L, 1); + return 1; + } + throw new Error(`Unsupported Redis command in mock: ${cmd}`); } const CJSON_AND_REDIS_PREAMBLE = ` cjson = {} function cjson.encode(t) - if type(t) ~= "table" then return tostring(t) end - local parts = {} - for k, v in pairs(t) do - local key = '"' .. tostring(k) .. '"' - local val - if type(v) == "boolean" then - val = v and "true" or "false" - elseif type(v) == "number" then - if v == math.floor(v) and v < 1e15 and v > -1e15 then - val = string.format("%d", v) - else - val = tostring(v) + if t == nil then return "null" end + if type(t) ~= "table" then + if type(t) == "string" then return '"' .. t .. '"' end + if type(t) == "boolean" then return t and "true" or "false" end + if type(t) == "number" then + if t == math.floor(t) and t < 1e15 and t > -1e15 then + return string.format("%d", t) + end + return tostring(t) + end + return tostring(t) + end + local n = #t + local isArray = n > 0 + if isArray then + for k in pairs(t) do + if type(k) ~= "number" or k ~= math.floor(k) or k < 1 or k > n then + isArray = false + break end - elseif type(v) == "string" then - val = '"' .. v .. '"' - else - val = tostring(v) end - parts[#parts + 1] = key .. ":" .. val + end + if isArray then + local parts = {} + for i = 1, n do + parts[#parts + 1] = cjson.encode(t[i]) + end + return "[" .. table.concat(parts, ",") .. "]" + end + local parts = {} + for k, v in pairs(t) do + parts[#parts + 1] = '"' .. tostring(k) .. '":' .. cjson.encode(v) end return "{" .. table.concat(parts, ",") .. "}" end @@ -115,7 +134,8 @@ function setStringArrayGlobal( } function runScript(L: LuaState, script: string): string { - const status = lauxlib.luaL_dostring(L, toLuastring(script)); + const wrapped = `local __r = (function()\n${script}\nend)()\nreturn cjson.encode(__r)`; + const status = lauxlib.luaL_dostring(L, toLuastring(wrapped)); if (status !== lua.LUA_OK) { const errMsg = toJsstring(lua.lua_tostring(L, -1)); throw new Error(`Lua error: ${errMsg}`); diff --git a/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts b/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts index 2c809559..515f1377 100644 --- a/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts @@ -1,31 +1,37 @@ import recordResultLuaSrc from "services/record-result.lua"; import { createRedisStore, evalLua } from "__tests__/helpers/lua-redis-mock"; +// ARGV: [now, success, cooldownMs, decayPeriodMs, cbErrorThreshold, cbMinAttempts, cbWindowPeriodMs] +// KEYS: [cbKey] +// Returns: [ok (0|1), state] state: "closed" | "opened" | "failed" + type RecordResultArgs = { now: number; success: boolean; - cbWindowPeriodMs: number; + cooldownMs: number; + decayPeriodMs: number; cbErrorThreshold: number; cbMinAttempts: number; - cbCooldownMs: number; - decayPeriodMs: number; + cbWindowPeriodMs: number; }; const defaultArgs: RecordResultArgs = { now: 1_000_000, success: true, - cbWindowPeriodMs: 60_000, + cooldownMs: 60_000, + decayPeriodMs: 300_000, cbErrorThreshold: 0.5, cbMinAttempts: 10, - cbCooldownMs: 60_000, - decayPeriodMs: 300_000, + cbWindowPeriodMs: 60_000, }; +type RecordResultResult = [number, string]; + function runRecordResult( store: ReturnType, args: Partial = {}, targetId = "t1", -) { +): RecordResultResult { const merged = { ...defaultArgs, ...args }; return evalLua( recordResultLuaSrc, @@ -33,22 +39,24 @@ function runRecordResult( [ merged.now.toString(), merged.success ? "1" : "0", - merged.cbWindowPeriodMs.toString(), + merged.cooldownMs.toString(), + merged.decayPeriodMs.toString(), merged.cbErrorThreshold.toString(), merged.cbMinAttempts.toString(), - merged.cbCooldownMs.toString(), - merged.decayPeriodMs.toString(), + merged.cbWindowPeriodMs.toString(), ], store, - ); + ) as RecordResultResult; } describe("record-result.lua", () => { describe("success recording", () => { it("returns closed state for a successful result", () => { const store = createRedisStore(); - const result = runRecordResult(store, { success: true }); - expect(result).toEqual({ ok: true, state: "closed" }); + const [ok, state] = runRecordResult(store, { success: true }); + + expect(ok).toBe(1); + expect(state).toBe("closed"); }); it("increments attempt count without incrementing failures", () => { @@ -71,6 +79,14 @@ describe("record-result.lua", () => { expect(cbHash.get("cb_failures")).toBe("1"); }); + it("returns failed state for a single failure below threshold", () => { + const store = createRedisStore(); + const [ok, state] = runRecordResult(store, { success: false }); + + expect(ok).toBe(0); + expect(state).toBe("failed"); + }); + it("stays closed when below error threshold", () => { const store = createRedisStore(); const now = 1_000_000; @@ -82,8 +98,9 @@ describe("record-result.lua", () => { runRecordResult(store, { now, success: false }); } - const result = runRecordResult(store, { now, success: true }); - expect(result).toEqual({ ok: true, state: "closed" }); + const [ok, state] = runRecordResult(store, { now, success: true }); + expect(ok).toBe(1); + expect(state).toBe("closed"); }); }); @@ -92,22 +109,24 @@ describe("record-result.lua", () => { const store = createRedisStore(); const now = 1_000_000; - for (let i = 0; i < 10; i++) { - runRecordResult(store, { + for (let i = 0; i < 4; i++) { + const [, state] = runRecordResult(store, { now, success: false, cbMinAttempts: 5, cbErrorThreshold: 0.5, }); + expect(state).toBe("failed"); } - const result = runRecordResult(store, { + const [ok, state] = runRecordResult(store, { now, success: false, cbMinAttempts: 5, cbErrorThreshold: 0.5, }); - expect(result).toEqual({ ok: false, state: "opened" }); + expect(ok).toBe(0); + expect(state).toBe("opened"); }); it("does not open circuit when below minimum attempts", () => { @@ -122,146 +141,173 @@ describe("record-result.lua", () => { }); } - const result = runRecordResult(store, { + const [ok, state] = runRecordResult(store, { now, success: false, cbMinAttempts: 10, }); - expect(result).toEqual({ ok: true, state: "closed" }); + expect(ok).toBe(0); + expect(state).toBe("failed"); }); it("sets opened_until_ms with cooldown on open", () => { const store = createRedisStore(); const now = 1_000_000; - const cbCooldownMs = 30_000; + const cooldownMs = 30_000; - for (let i = 0; i < 10; i++) { + for (let i = 0; i < 5; i++) { runRecordResult(store, { now, success: false, cbMinAttempts: 5, cbErrorThreshold: 0.5, - cbCooldownMs, + cooldownMs, }); } const cbHash = store.get("cb:t1")!; - expect(Number(cbHash.get("opened_until_ms"))).toBe(now + cbCooldownMs); + expect(Number(cbHash.get("opened_until_ms"))).toBe(now + cooldownMs); }); - }); - describe("window rolling", () => { - it("rolls window when period expires", () => { + it("resets all counters on open", () => { const store = createRedisStore(); - const windowPeriodMs = 60_000; - const t0 = 1_000_000; - const t1 = t0 + windowPeriodMs + 1; + const now = 1_000_000; - for (let i = 0; i < 3; i++) { + for (let i = 0; i < 5; i++) { runRecordResult(store, { - now: t0, + now, success: false, - cbWindowPeriodMs: windowPeriodMs, + cbMinAttempts: 5, + cbErrorThreshold: 0.5, }); } - runRecordResult(store, { - now: t1, - success: true, - cbWindowPeriodMs: windowPeriodMs, - }); - const cbHash = store.get("cb:t1")!; - expect(cbHash.get("cb_prev_failures")).toBe("3"); - expect(cbHash.get("cb_prev_attempts")).toBe("3"); - expect(cbHash.get("cb_attempts")).toBe("1"); expect(cbHash.get("cb_failures")).toBe("0"); + expect(cbHash.get("cb_attempts")).toBe("0"); + expect(cbHash.get("cb_window_from")).toBe("0"); + expect(cbHash.get("cb_prev_failures")).toBe("0"); + expect(cbHash.get("cb_prev_attempts")).toBe("0"); }); - it("initialises window_from on first call", () => { + it("does not double-trip when circuit is already open", () => { const store = createRedisStore(); const now = 1_000_000; + const openedUntil = now + 60_000; + + store.set( + "cb:t1", + new Map([ + ["opened_until_ms", openedUntil.toString()], + ["cb_window_from", now.toString()], + ]), + ); - runRecordResult(store, { now }); + for (let i = 0; i < 20; i++) { + const [, state] = runRecordResult(store, { + now, + success: false, + cbMinAttempts: 5, + cbErrorThreshold: 0.5, + }); + expect(state).toBe("failed"); + } const cbHash = store.get("cb:t1")!; - expect(cbHash.get("cb_window_from")).toBe(now.toString()); + expect(Number(cbHash.get("opened_until_ms"))).toBe(openedUntil); }); }); describe("two-window blended rate", () => { it("blends previous window failures into current assessment", () => { const store = createRedisStore(); - const windowPeriodMs = 60_000; - const t0 = 1_000_000; + const now = 1_000_000; + const cbWindowPeriodMs = 60_000; - for (let i = 0; i < 10; i++) { - runRecordResult(store, { - now: t0, - success: false, - cbWindowPeriodMs: windowPeriodMs, - cbMinAttempts: 5, - cbErrorThreshold: 0.5, - }); - } + store.set( + "cb:t1", + new Map([ + ["cb_window_from", now.toString()], + ["cb_prev_failures", "8"], + ["cb_prev_attempts", "10"], + ]), + ); - const justAfterRoll = t0 + windowPeriodMs + 1; - const result = runRecordResult(store, { - now: justAfterRoll, + const [ok, state] = runRecordResult(store, { + now, success: false, - cbWindowPeriodMs: windowPeriodMs, + cbWindowPeriodMs, cbMinAttempts: 5, cbErrorThreshold: 0.5, }); - - expect(result).toEqual({ ok: false, state: "opened" }); + expect(ok).toBe(0); + expect(state).toBe("opened"); }); - it("reduces previous window weight as current window progresses", () => { + it("reduces previous window weight as current window ages", () => { const store = createRedisStore(); - const windowPeriodMs = 100_000; + const cbWindowPeriodMs = 100_000; const t0 = 1_000_000; + const nearEnd = t0 + cbWindowPeriodMs - 1; - for (let i = 0; i < 10; i++) { - runRecordResult(store, { - now: t0, - success: false, - cbWindowPeriodMs: windowPeriodMs, - cbMinAttempts: 5, - cbErrorThreshold: 0.5, - }); - } + store.set( + "cb:t1", + new Map([ + ["cb_window_from", t0.toString()], + ["cb_prev_failures", "10"], + ["cb_prev_attempts", "10"], + ]), + ); - const nearEndOfWindow = t0 + windowPeriodMs + windowPeriodMs - 1; for (let i = 0; i < 20; i++) { runRecordResult(store, { - now: nearEndOfWindow, + now: nearEnd, success: true, - cbWindowPeriodMs: windowPeriodMs, + cbWindowPeriodMs, cbMinAttempts: 5, cbErrorThreshold: 0.5, }); } - const result = runRecordResult(store, { - now: nearEndOfWindow, - success: true, - cbWindowPeriodMs: windowPeriodMs, + const [, state] = runRecordResult(store, { + now: nearEnd, + success: false, + cbWindowPeriodMs, + cbMinAttempts: 5, + cbErrorThreshold: 0.5, + }); + expect(state).toBe("failed"); + }); + + it("ignores previous window when cbWindowPeriodMs is 0", () => { + const store = createRedisStore(); + const now = 1_000_000; + + store.set( + "cb:t1", + new Map([ + ["cb_window_from", now.toString()], + ["cb_prev_failures", "100"], + ["cb_prev_attempts", "100"], + ]), + ); + + const [, state] = runRecordResult(store, { + now, + success: false, + cbWindowPeriodMs: 0, cbMinAttempts: 5, cbErrorThreshold: 0.5, }); - expect(result).toEqual({ ok: true, state: "closed" }); + expect(state).toBe("failed"); }); }); describe("decay period", () => { - it("clears opened_until_ms after decay period elapses", () => { + it("preserves opened_until_ms during active decay", () => { const store = createRedisStore(); - const openedAt = 1_000_000; - const cooldownMs = 60_000; - const decayPeriodMs = 300_000; - const openedUntil = openedAt + cooldownMs; - const afterDecay = openedUntil + decayPeriodMs + 1; + const openedUntil = 1_060_000; + const duringDecay = openedUntil + 100_000; store.set( "cb:t1", @@ -269,19 +315,20 @@ describe("record-result.lua", () => { ); runRecordResult(store, { - now: afterDecay, + now: duringDecay, success: true, - decayPeriodMs, + decayPeriodMs: 300_000, }); const cbHash = store.get("cb:t1")!; - expect(cbHash.get("opened_until_ms")).toBe("0"); + expect(Number(cbHash.get("opened_until_ms"))).toBe(openedUntil); }); - it("preserves opened_until_ms during active decay", () => { + it("clears opened_until_ms after decay period elapses", () => { const store = createRedisStore(); const openedUntil = 1_060_000; - const duringDecay = openedUntil + 100_000; + const decayPeriodMs = 300_000; + const afterDecay = openedUntil + decayPeriodMs + 1; store.set( "cb:t1", @@ -289,13 +336,23 @@ describe("record-result.lua", () => { ); runRecordResult(store, { - now: duringDecay, + now: afterDecay, success: true, - decayPeriodMs: 300_000, + decayPeriodMs, }); const cbHash = store.get("cb:t1")!; - expect(Number(cbHash.get("opened_until_ms"))).toBe(openedUntil); + expect(cbHash.get("opened_until_ms")).toBe("0"); + }); + + it("clears opened_until_ms when circuit was never opened", () => { + const store = createRedisStore(); + const now = 1_000_000; + + runRecordResult(store, { now, success: true }); + + const cbHash = store.get("cb:t1")!; + expect(cbHash.get("opened_until_ms")).toBe("0"); }); }); From c55139d9f93256151bdb19e98ff09fc777ddb65d Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Mon, 20 Apr 2026 13:26:10 +0100 Subject: [PATCH 18/65] CCM-16073 - PR feedback --- .../client-delivery/iam_role_sqs_target.tf | 17 +++++++++++++ .../fixtures/subscriptions/mock-client-1.json | 12 ++++++---- .../fixtures/subscriptions/mock-client-2.json | 24 +++++++++++-------- .../mock-client-circuit-breaker.json | 12 +++++----- .../subscriptions/mock-client-mtls.json | 14 ++++++----- .../subscriptions/mock-client-rate-limit.json | 12 ++++++---- 6 files changed, 59 insertions(+), 32 deletions(-) diff --git a/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf b/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf index 8678daf3..55162684 100644 --- a/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf +++ b/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf @@ -131,4 +131,21 @@ data "aws_iam_policy_document" "https_client_lambda" { ] } } + + dynamic "statement" { + for_each = var.elasticache_endpoint != "" ? [1] : [] + content { + sid = "ElastiCacheConnect" + effect = "Allow" + + actions = [ + "elasticache:Connect", + ] + + resources = [ + "arn:aws:elasticache:${var.region}:${var.aws_account_id}:serverlesscache:${var.elasticache_cache_name}", + "arn:aws:elasticache:${var.region}:${var.aws_account_id}:user:${var.elasticache_iam_username}", + ] + } + } } diff --git a/tests/integration/fixtures/subscriptions/mock-client-1.json b/tests/integration/fixtures/subscriptions/mock-client-1.json index ed70c73f..2542e92b 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-1.json +++ b/tests/integration/fixtures/subscriptions/mock-client-1.json @@ -35,15 +35,17 @@ "headerName": "x-api-key", "headerValue": "REPLACED_BY_TERRAFORM" }, - "certPinning": { - "enabled": false + "delivery": { + "mtls": { + "enabled": false, + "certPinning": { + "enabled": false + } + } }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", "invocationMethod": "POST", "invocationRateLimit": 10, - "mtls": { - "enabled": false - }, "targetId": "target-23b2ee2f-8e81-43cd-9bb8-5ea30a09f779", "type": "API" } diff --git a/tests/integration/fixtures/subscriptions/mock-client-2.json b/tests/integration/fixtures/subscriptions/mock-client-2.json index ab5460c9..711b8f9f 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-2.json +++ b/tests/integration/fixtures/subscriptions/mock-client-2.json @@ -20,15 +20,17 @@ "headerName": "x-api-key", "headerValue": "REPLACED_BY_TERRAFORM" }, - "certPinning": { - "enabled": false + "delivery": { + "mtls": { + "enabled": false, + "certPinning": { + "enabled": false + } + } }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", "invocationMethod": "POST", "invocationRateLimit": 10, - "mtls": { - "enabled": false - }, "targetId": "target-1f3aa57d-c0b6-4a0a-a8e9-c7f97f1e27e7", "type": "API" }, @@ -37,15 +39,17 @@ "headerName": "x-api-key", "headerValue": "REPLACED_BY_TERRAFORM" }, - "certPinning": { - "enabled": false + "delivery": { + "mtls": { + "enabled": false, + "certPinning": { + "enabled": false + } + } }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", "invocationMethod": "POST", "invocationRateLimit": 10, - "mtls": { - "enabled": false - }, "targetId": "target-c23f4ad8-2b6f-4510-b5b6-40f2b7fbbec5", "type": "API" } diff --git a/tests/integration/fixtures/subscriptions/mock-client-circuit-breaker.json b/tests/integration/fixtures/subscriptions/mock-client-circuit-breaker.json index 783681c5..f46e2dd7 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-circuit-breaker.json +++ b/tests/integration/fixtures/subscriptions/mock-client-circuit-breaker.json @@ -19,20 +19,20 @@ "headerName": "x-api-key", "headerValue": "REPLACED_BY_TERRAFORM" }, - "certPinning": { - "enabled": false - }, "delivery": { "circuitBreaker": { "enabled": true + }, + "mtls": { + "enabled": false, + "certPinning": { + "enabled": false + } } }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", "invocationMethod": "POST", "invocationRateLimit": 10, - "mtls": { - "enabled": false - }, "targetId": "target-cb-001", "type": "API" } diff --git a/tests/integration/fixtures/subscriptions/mock-client-mtls.json b/tests/integration/fixtures/subscriptions/mock-client-mtls.json index d225612f..d9879679 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-mtls.json +++ b/tests/integration/fixtures/subscriptions/mock-client-mtls.json @@ -19,16 +19,18 @@ "headerName": "x-api-key", "headerValue": "REPLACED_BY_TERRAFORM" }, - "certPinning": { - "enabled": true, - "spkiHash": "REPLACED_BY_TERRAFORM" + "delivery": { + "mtls": { + "enabled": true, + "certPinning": { + "enabled": true, + "spkiHash": "REPLACED_BY_TERRAFORM" + } + } }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", "invocationMethod": "POST", "invocationRateLimit": 10, - "mtls": { - "enabled": true - }, "targetId": "target-mtls-001", "type": "API" } diff --git a/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json b/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json index 80a40e6a..35271dce 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json +++ b/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json @@ -19,15 +19,17 @@ "headerName": "x-api-key", "headerValue": "REPLACED_BY_TERRAFORM" }, - "certPinning": { - "enabled": false + "delivery": { + "mtls": { + "enabled": false, + "certPinning": { + "enabled": false + } + } }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", "invocationMethod": "POST", "invocationRateLimit": 2, - "mtls": { - "enabled": false - }, "targetId": "target-rl-001", "type": "API" } From 3cf99aa99e53195453075b6f7de8801291ffad72 Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Mon, 20 Apr 2026 13:35:09 +0100 Subject: [PATCH 19/65] CCM-16073 - PR feedback --- pnpm-lock.yaml | 353 ++++++++++---------------------------------- pnpm-workspace.yaml | 2 +- 2 files changed, 79 insertions(+), 276 deletions(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2699d50c..10503d34 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -152,8 +152,8 @@ catalogs: specifier: ^8.10.161 version: 8.10.161 '@types/node': - specifier: ^24.12.0 - version: 24.12.0 + specifier: ^25.5.0 + version: 25.6.0 '@types/node-forge': specifier: ^1.3.11 version: 1.3.14 @@ -225,7 +225,7 @@ importers: version: 4.16.2(@typescript-eslint/utils@8.58.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint-import-resolver-node@0.3.9)(eslint@9.39.4(jiti@2.6.1)) eslint-plugin-jest: specifier: catalog:lint - version: 29.15.2(@typescript-eslint/eslint-plugin@8.58.0(@typescript-eslint/parser@8.58.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.4(jiti@2.6.1))(jest@30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)))(typescript@5.9.3) + version: 29.15.2(@typescript-eslint/eslint-plugin@8.58.0(@typescript-eslint/parser@8.58.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.4(jiti@2.6.1))(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)))(typescript@5.9.3) eslint-plugin-json: specifier: catalog:lint version: 4.0.1 @@ -252,10 +252,10 @@ importers: version: 63.0.0(eslint@9.39.4(jiti@2.6.1)) jest: specifier: catalog:test - version: 30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) + version: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) jest-html-reporter: specifier: catalog:test - version: 4.4.0(jest@30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3))) + version: 4.4.0(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3))) knip: specifier: catalog:tools version: 6.4.1(@emnapi/core@1.9.1)(@emnapi/runtime@1.9.1) @@ -264,10 +264,10 @@ importers: version: 5.0.1 ts-jest: specifier: catalog:test - version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)))(typescript@5.9.3) + version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)))(typescript@5.9.3) ts-node: specifier: catalog:tools - version: 10.9.2(@types/node@25.5.0)(typescript@5.9.3) + version: 10.9.2(@types/node@25.6.0)(typescript@5.9.3) tsx: specifier: catalog:tools version: 4.21.0 @@ -321,13 +321,13 @@ importers: version: 30.0.0 '@types/node': specifier: catalog:tools - version: 24.12.0 + version: 25.6.0 eslint: specifier: catalog:lint version: 9.39.4(jiti@2.6.1) jest: specifier: catalog:test - version: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)) + version: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) typescript: specifier: catalog:tools version: 5.9.3 @@ -391,7 +391,7 @@ importers: version: 30.0.0 '@types/node': specifier: catalog:tools - version: 24.12.0 + version: 25.6.0 '@types/node-forge': specifier: catalog:tools version: 1.3.14 @@ -403,7 +403,7 @@ importers: version: 0.1.5 jest: specifier: catalog:test - version: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) + version: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) typescript: specifier: catalog:tools version: 5.9.3 @@ -431,19 +431,19 @@ importers: version: 30.0.0 '@types/node': specifier: catalog:tools - version: 24.12.0 + version: 25.6.0 eslint: specifier: catalog:lint version: 9.39.4(jiti@2.6.1) jest: specifier: catalog:test - version: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)) + version: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) jest-html-reporter: specifier: catalog:test - version: 4.4.0(jest@30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3))) + version: 4.4.0(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3))) ts-jest: specifier: catalog:test - version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)))(typescript@5.9.3) + version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)))(typescript@5.9.3) typescript: specifier: catalog:tools version: 5.9.3 @@ -462,16 +462,16 @@ importers: version: 30.0.0 '@types/node': specifier: catalog:tools - version: 24.12.0 + version: 25.6.0 eslint: specifier: catalog:lint version: 9.39.4(jiti@2.6.1) jest: specifier: catalog:test - version: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) + version: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) ts-jest: specifier: catalog:test - version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)))(typescript@5.9.3) + version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)))(typescript@5.9.3) typescript: specifier: catalog:tools version: 5.9.3 @@ -496,16 +496,16 @@ importers: version: 30.0.0 '@types/node': specifier: catalog:tools - version: 24.12.0 + version: 25.6.0 eslint: specifier: catalog:lint version: 9.39.4(jiti@2.6.1) jest: specifier: catalog:test - version: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) + version: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) ts-jest: specifier: catalog:test - version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)))(typescript@5.9.3) + version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)))(typescript@5.9.3) typescript: specifier: catalog:tools version: 5.9.3 @@ -524,16 +524,16 @@ importers: version: 30.0.0 '@types/node': specifier: catalog:tools - version: 24.12.0 + version: 25.6.0 eslint: specifier: catalog:lint version: 9.39.4(jiti@2.6.1) jest: specifier: catalog:test - version: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)) + version: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) ts-jest: specifier: catalog:test - version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)))(typescript@5.9.3) + version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)))(typescript@5.9.3) typescript: specifier: catalog:tools version: 5.9.3 @@ -552,16 +552,16 @@ importers: version: 30.0.0 '@types/node': specifier: catalog:tools - version: 24.12.0 + version: 25.6.0 eslint: specifier: catalog:lint version: 9.39.4(jiti@2.6.1) jest: specifier: catalog:test - version: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)) + version: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) ts-jest: specifier: catalog:test - version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)))(typescript@5.9.3) + version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)))(typescript@5.9.3) typescript: specifier: catalog:tools version: 5.9.3 @@ -598,13 +598,13 @@ importers: version: 30.0.0 '@types/node': specifier: catalog:tools - version: 24.12.0 + version: 25.6.0 eslint: specifier: catalog:lint version: 9.39.4(jiti@2.6.1) jest: specifier: catalog:test - version: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)) + version: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) typescript: specifier: catalog:tools version: 5.9.3 @@ -635,13 +635,13 @@ importers: version: 30.0.0 '@types/node': specifier: catalog:tools - version: 24.12.0 + version: 25.6.0 eslint: specifier: catalog:lint version: 9.39.4(jiti@2.6.1) jest: specifier: catalog:test - version: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)) + version: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) typescript: specifier: catalog:tools version: 5.9.3 @@ -663,7 +663,7 @@ importers: version: 22.0.5 '@types/node': specifier: catalog:tools - version: 24.12.0 + version: 25.6.0 eslint: specifier: catalog:lint version: 9.39.4(jiti@2.6.1) @@ -706,7 +706,7 @@ importers: version: 30.0.0 '@types/node': specifier: catalog:tools - version: 24.12.0 + version: 25.6.0 '@types/yargs': specifier: catalog:tools version: 17.0.35 @@ -715,10 +715,10 @@ importers: version: 9.39.4(jiti@2.6.1) jest: specifier: catalog:test - version: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)) + version: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) ts-jest: specifier: catalog:test - version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)))(typescript@5.9.3) + version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)))(typescript@5.9.3) tsx: specifier: catalog:tools version: 4.21.0 @@ -2158,11 +2158,8 @@ packages: '@types/node-forge@1.3.14': resolution: {integrity: sha512-mhVF2BnD4BO+jtOp7z1CdzaK4mbuK0LLQYAvdOLqHTavxFNq4zA1EmYkpnFjP8HOUzedfQkRnp0E2ulSAYSzAw==} - '@types/node@24.12.0': - resolution: {integrity: sha512-GYDxsZi3ChgmckRT9HPU0WEhKLP08ev/Yfcq2AstjrDASOYCSXeyjDsHg4v5t4jOj7cyDX3vmprafKlWIG9MXQ==} - - '@types/node@25.5.0': - resolution: {integrity: sha512-jp2P3tQMSxWugkCUKLRPVUpGaL5MVFwF8RDuSRztfwgN1wmqJeMSbKlnEtQqU8UrhTmzEmZdu2I6v2dpp7XIxw==} + '@types/node@25.6.0': + resolution: {integrity: sha512-+qIYRKdNYJwY3vRCZMdJbPLJAtGjQBudzZzdzwQYkEPQd+PJGixUL5QfvCLDaULoLv+RhT3LDkwEfKaAkgSmNQ==} '@types/stack-utils@2.0.3': resolution: {integrity: sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw==} @@ -4509,11 +4506,8 @@ packages: resolution: {integrity: sha512-nWJ91DjeOkej/TA8pXQ3myruKpKEYgqvpw9lz4OPHj/NWFNluYrjbz9j01CJ8yKQd2g4jFoOkINCTW2I5LEEyw==} engines: {node: '>= 0.4'} - undici-types@7.16.0: - resolution: {integrity: sha512-Zz+aZWSj8LE6zoxD+xrjh4VfkIG8Ya6LvYkZqtUQGJPZjYl53ypCaUwWqo7eI0x66KBGeRo+mlBEkMSeSZ38Nw==} - - undici-types@7.18.2: - resolution: {integrity: sha512-AsuCzffGHJybSaRrmr5eHr81mwJU3kjw6M+uprWvCXiNeN9SOGwQ3Jn8jb8m3Z6izVgknn1R0FTCEAP2QrLY/w==} + undici-types@7.19.2: + resolution: {integrity: sha512-qYVnV5OEm2AW8cJMCpdV20CDyaN3g0AjDlOGf1OW4iaDEx8MwdtChUp4zu4H0VP3nDRF/8RKWH+IPp9uW0YGZg==} unrs-resolver@1.11.1: resolution: {integrity: sha512-bSjt9pjaEBnNiGgc9rUiHGKv5l4/TGzDmYw3RhnkJGtLhbnnA/5qJj7x3dNDCRx/PJxu774LlH8lCOlB4hEfKg==} @@ -5910,13 +5904,13 @@ snapshots: '@jest/console@30.3.0': dependencies: '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 25.6.0 chalk: 4.1.2 jest-message-util: 30.3.0 jest-util: 30.3.0 slash: 3.0.0 - '@jest/core@30.3.0(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3))': + '@jest/core@30.3.0(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3))': dependencies: '@jest/console': 30.3.0 '@jest/pattern': 30.0.1 @@ -5924,49 +5918,14 @@ snapshots: '@jest/test-result': 30.3.0 '@jest/transform': 30.3.0 '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 25.6.0 ansi-escapes: 4.3.2 chalk: 4.1.2 ci-info: 4.4.0 exit-x: 0.2.2 graceful-fs: 4.2.11 jest-changed-files: 30.3.0 - jest-config: 30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)) - jest-haste-map: 30.3.0 - jest-message-util: 30.3.0 - jest-regex-util: 30.0.1 - jest-resolve: 30.3.0 - jest-resolve-dependencies: 30.3.0 - jest-runner: 30.3.0 - jest-runtime: 30.3.0 - jest-snapshot: 30.3.0 - jest-util: 30.3.0 - jest-validate: 30.3.0 - jest-watcher: 30.3.0 - pretty-format: 30.3.0 - slash: 3.0.0 - transitivePeerDependencies: - - babel-plugin-macros - - esbuild-register - - supports-color - - ts-node - - '@jest/core@30.3.0(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3))': - dependencies: - '@jest/console': 30.3.0 - '@jest/pattern': 30.0.1 - '@jest/reporters': 30.3.0 - '@jest/test-result': 30.3.0 - '@jest/transform': 30.3.0 - '@jest/types': 30.3.0 - '@types/node': 25.5.0 - ansi-escapes: 4.3.2 - chalk: 4.1.2 - ci-info: 4.4.0 - exit-x: 0.2.2 - graceful-fs: 4.2.11 - jest-changed-files: 30.3.0 - jest-config: 30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) + jest-config: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) jest-haste-map: 30.3.0 jest-message-util: 30.3.0 jest-regex-util: 30.0.1 @@ -5992,7 +5951,7 @@ snapshots: dependencies: '@jest/fake-timers': 30.3.0 '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 25.6.0 jest-mock: 30.3.0 '@jest/expect-utils@30.3.0': @@ -6010,7 +5969,7 @@ snapshots: dependencies: '@jest/types': 30.3.0 '@sinonjs/fake-timers': 15.3.2 - '@types/node': 25.5.0 + '@types/node': 25.6.0 jest-message-util: 30.3.0 jest-mock: 30.3.0 jest-util: 30.3.0 @@ -6028,7 +5987,7 @@ snapshots: '@jest/pattern@30.0.1': dependencies: - '@types/node': 25.5.0 + '@types/node': 25.6.0 jest-regex-util: 30.0.1 '@jest/reporters@30.3.0': @@ -6039,7 +5998,7 @@ snapshots: '@jest/transform': 30.3.0 '@jest/types': 30.3.0 '@jridgewell/trace-mapping': 0.3.31 - '@types/node': 25.5.0 + '@types/node': 25.6.0 chalk: 4.1.2 collect-v8-coverage: 1.0.3 exit-x: 0.2.2 @@ -6115,7 +6074,7 @@ snapshots: '@jest/schemas': 30.0.5 '@types/istanbul-lib-coverage': 2.0.6 '@types/istanbul-reports': 3.0.4 - '@types/node': 25.5.0 + '@types/node': 25.6.0 '@types/yargs': 17.0.35 chalk: 4.1.2 @@ -6749,15 +6708,11 @@ snapshots: '@types/node-forge@1.3.14': dependencies: - '@types/node': 25.5.0 - - '@types/node@24.12.0': - dependencies: - undici-types: 7.16.0 + '@types/node': 25.6.0 - '@types/node@25.5.0': + '@types/node@25.6.0': dependencies: - undici-types: 7.18.2 + undici-types: 7.19.2 '@types/stack-utils@2.0.3': {} @@ -7676,13 +7631,13 @@ snapshots: - eslint-import-resolver-webpack - supports-color - eslint-plugin-jest@29.15.2(@typescript-eslint/eslint-plugin@8.58.0(@typescript-eslint/parser@8.58.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.4(jiti@2.6.1))(jest@30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)))(typescript@5.9.3): + eslint-plugin-jest@29.15.2(@typescript-eslint/eslint-plugin@8.58.0(@typescript-eslint/parser@8.58.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.4(jiti@2.6.1))(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)))(typescript@5.9.3): dependencies: '@typescript-eslint/utils': 8.58.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3) eslint: 9.39.4(jiti@2.6.1) optionalDependencies: '@typescript-eslint/eslint-plugin': 8.58.0(@typescript-eslint/parser@8.58.0(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3))(eslint@9.39.4(jiti@2.6.1))(typescript@5.9.3) - jest: 30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) + jest: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) typescript: 5.9.3 transitivePeerDependencies: - supports-color @@ -8387,7 +8342,7 @@ snapshots: '@jest/expect': 30.3.0 '@jest/test-result': 30.3.0 '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 25.6.0 chalk: 4.1.2 co: 4.6.0 dedent: 1.7.2 @@ -8407,15 +8362,15 @@ snapshots: - babel-plugin-macros - supports-color - jest-cli@30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)): + jest-cli@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)): dependencies: - '@jest/core': 30.3.0(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)) + '@jest/core': 30.3.0(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) '@jest/test-result': 30.3.0 '@jest/types': 30.3.0 chalk: 4.1.2 exit-x: 0.2.2 import-local: 3.2.0 - jest-config: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)) + jest-config: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) jest-util: 30.3.0 jest-validate: 30.3.0 yargs: 17.7.2 @@ -8426,58 +8381,7 @@ snapshots: - supports-color - ts-node - jest-cli@30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)): - dependencies: - '@jest/core': 30.3.0(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) - '@jest/test-result': 30.3.0 - '@jest/types': 30.3.0 - chalk: 4.1.2 - exit-x: 0.2.2 - import-local: 3.2.0 - jest-config: 30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) - jest-util: 30.3.0 - jest-validate: 30.3.0 - yargs: 17.7.2 - transitivePeerDependencies: - - '@types/node' - - babel-plugin-macros - - esbuild-register - - supports-color - - ts-node - - jest-config@30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)): - dependencies: - '@babel/core': 7.29.0 - '@jest/get-type': 30.1.0 - '@jest/pattern': 30.0.1 - '@jest/test-sequencer': 30.3.0 - '@jest/types': 30.3.0 - babel-jest: 30.3.0(@babel/core@7.29.0) - chalk: 4.1.2 - ci-info: 4.4.0 - deepmerge: 4.3.1 - glob: 10.5.0 - graceful-fs: 4.2.11 - jest-circus: 30.3.0 - jest-docblock: 30.2.0 - jest-environment-node: 30.3.0 - jest-regex-util: 30.0.1 - jest-resolve: 30.3.0 - jest-runner: 30.3.0 - jest-util: 30.3.0 - jest-validate: 30.3.0 - parse-json: 5.2.0 - pretty-format: 30.3.0 - slash: 3.0.0 - strip-json-comments: 3.1.1 - optionalDependencies: - '@types/node': 24.12.0 - ts-node: 10.9.2(@types/node@24.12.0)(typescript@5.9.3) - transitivePeerDependencies: - - babel-plugin-macros - - supports-color - - jest-config@30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)): + jest-config@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)): dependencies: '@babel/core': 7.29.0 '@jest/get-type': 30.1.0 @@ -8503,40 +8407,8 @@ snapshots: slash: 3.0.0 strip-json-comments: 3.1.1 optionalDependencies: - '@types/node': 25.5.0 - ts-node: 10.9.2(@types/node@24.12.0)(typescript@5.9.3) - transitivePeerDependencies: - - babel-plugin-macros - - supports-color - - jest-config@30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)): - dependencies: - '@babel/core': 7.29.0 - '@jest/get-type': 30.1.0 - '@jest/pattern': 30.0.1 - '@jest/test-sequencer': 30.3.0 - '@jest/types': 30.3.0 - babel-jest: 30.3.0(@babel/core@7.29.0) - chalk: 4.1.2 - ci-info: 4.4.0 - deepmerge: 4.3.1 - glob: 10.5.0 - graceful-fs: 4.2.11 - jest-circus: 30.3.0 - jest-docblock: 30.2.0 - jest-environment-node: 30.3.0 - jest-regex-util: 30.0.1 - jest-resolve: 30.3.0 - jest-runner: 30.3.0 - jest-util: 30.3.0 - jest-validate: 30.3.0 - parse-json: 5.2.0 - pretty-format: 30.3.0 - slash: 3.0.0 - strip-json-comments: 3.1.1 - optionalDependencies: - '@types/node': 25.5.0 - ts-node: 10.9.2(@types/node@25.5.0)(typescript@5.9.3) + '@types/node': 25.6.0 + ts-node: 10.9.2(@types/node@25.6.0)(typescript@5.9.3) transitivePeerDependencies: - babel-plugin-macros - supports-color @@ -8565,7 +8437,7 @@ snapshots: '@jest/environment': 30.3.0 '@jest/fake-timers': 30.3.0 '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 25.6.0 jest-mock: 30.3.0 jest-util: 30.3.0 jest-validate: 30.3.0 @@ -8573,7 +8445,7 @@ snapshots: jest-haste-map@30.3.0: dependencies: '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 25.6.0 anymatch: 3.1.3 fb-watchman: 2.0.2 graceful-fs: 4.2.11 @@ -8585,27 +8457,13 @@ snapshots: optionalDependencies: fsevents: 2.3.3 - jest-html-reporter@4.4.0(jest@30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3))): + jest-html-reporter@4.4.0(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3))): dependencies: '@jest/reporters': 30.3.0 '@jest/test-result': 30.3.0 '@jest/types': 30.3.0 dateformat: 3.0.2 - jest: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)) - mkdirp: 1.0.4 - strip-ansi: 6.0.1 - xmlbuilder: 15.0.0 - transitivePeerDependencies: - - node-notifier - - supports-color - - jest-html-reporter@4.4.0(jest@30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3))): - dependencies: - '@jest/reporters': 30.3.0 - '@jest/test-result': 30.3.0 - '@jest/types': 30.3.0 - dateformat: 3.0.2 - jest: 30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) + jest: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) mkdirp: 1.0.4 strip-ansi: 6.0.1 xmlbuilder: 15.0.0 @@ -8640,7 +8498,7 @@ snapshots: jest-mock@30.3.0: dependencies: '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 25.6.0 jest-util: 30.3.0 jest-pnp-resolver@1.2.3(jest-resolve@30.3.0): @@ -8674,7 +8532,7 @@ snapshots: '@jest/test-result': 30.3.0 '@jest/transform': 30.3.0 '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 25.6.0 chalk: 4.1.2 emittery: 0.13.1 exit-x: 0.2.2 @@ -8703,7 +8561,7 @@ snapshots: '@jest/test-result': 30.3.0 '@jest/transform': 30.3.0 '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 25.6.0 chalk: 4.1.2 cjs-module-lexer: 2.2.0 collect-v8-coverage: 1.0.3 @@ -8750,7 +8608,7 @@ snapshots: jest-util@30.3.0: dependencies: '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 25.6.0 chalk: 4.1.2 ci-info: 4.4.0 graceful-fs: 4.2.11 @@ -8769,7 +8627,7 @@ snapshots: dependencies: '@jest/test-result': 30.3.0 '@jest/types': 30.3.0 - '@types/node': 25.5.0 + '@types/node': 25.6.0 ansi-escapes: 4.3.2 chalk: 4.1.2 emittery: 0.13.1 @@ -8778,31 +8636,18 @@ snapshots: jest-worker@30.3.0: dependencies: - '@types/node': 25.5.0 + '@types/node': 25.6.0 '@ungap/structured-clone': 1.3.0 jest-util: 30.3.0 merge-stream: 2.0.0 supports-color: 8.1.1 - jest@30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)): - dependencies: - '@jest/core': 30.3.0(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)) - '@jest/types': 30.3.0 - import-local: 3.2.0 - jest-cli: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)) - transitivePeerDependencies: - - '@types/node' - - babel-plugin-macros - - esbuild-register - - supports-color - - ts-node - - jest@30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)): + jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)): dependencies: - '@jest/core': 30.3.0(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) + '@jest/core': 30.3.0(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) '@jest/types': 30.3.0 import-local: 3.2.0 - jest-cli: 30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) + jest-cli: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) transitivePeerDependencies: - '@types/node' - babel-plugin-macros @@ -9596,33 +9441,12 @@ snapshots: picomatch: 4.0.4 typescript: 5.9.3 - ts-jest@29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)))(typescript@5.9.3): - dependencies: - bs-logger: 0.2.6 - fast-json-stable-stringify: 2.1.0 - handlebars: 4.7.9 - jest: 30.3.0(@types/node@24.12.0)(ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3)) - json5: 2.2.3 - lodash.memoize: 4.1.2 - make-error: 1.3.6 - semver: 7.7.4 - type-fest: 4.41.0 - typescript: 5.9.3 - yargs-parser: 21.1.1 - optionalDependencies: - '@babel/core': 7.29.0 - '@jest/transform': 30.3.0 - '@jest/types': 30.3.0 - babel-jest: 30.3.0(@babel/core@7.29.0) - esbuild: 0.28.0 - jest-util: 30.3.0 - - ts-jest@29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)))(typescript@5.9.3): + ts-jest@29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)))(typescript@5.9.3): dependencies: bs-logger: 0.2.6 fast-json-stable-stringify: 2.1.0 handlebars: 4.7.9 - jest: 30.3.0(@types/node@25.5.0)(ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3)) + jest: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) json5: 2.2.3 lodash.memoize: 4.1.2 make-error: 1.3.6 @@ -9638,33 +9462,14 @@ snapshots: esbuild: 0.28.0 jest-util: 30.3.0 - ts-node@10.9.2(@types/node@24.12.0)(typescript@5.9.3): + ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3): dependencies: '@cspotcode/source-map-support': 0.8.1 '@tsconfig/node10': 1.0.12 '@tsconfig/node12': 1.0.11 '@tsconfig/node14': 1.0.3 '@tsconfig/node16': 1.0.4 - '@types/node': 24.12.0 - acorn: 8.16.0 - acorn-walk: 8.3.5 - arg: 4.1.3 - create-require: 1.1.1 - diff: 4.0.4 - make-error: 1.3.6 - typescript: 5.9.3 - v8-compile-cache-lib: 3.0.1 - yn: 3.1.1 - optional: true - - ts-node@10.9.2(@types/node@25.5.0)(typescript@5.9.3): - dependencies: - '@cspotcode/source-map-support': 0.8.1 - '@tsconfig/node10': 1.0.12 - '@tsconfig/node12': 1.0.11 - '@tsconfig/node14': 1.0.3 - '@tsconfig/node16': 1.0.4 - '@types/node': 25.5.0 + '@types/node': 25.6.0 acorn: 8.16.0 acorn-walk: 8.3.5 arg: 4.1.3 @@ -9761,9 +9566,7 @@ snapshots: has-symbols: 1.1.0 which-boxed-primitive: 1.1.1 - undici-types@7.16.0: {} - - undici-types@7.18.2: {} + undici-types@7.19.2: {} unrs-resolver@1.11.1: dependencies: diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 52fb20f0..90671764 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -59,7 +59,7 @@ catalogs: tools: "@tsconfig/node22": "^22.0.5" "@types/aws-lambda": "^8.10.161" - "@types/node": "^24.12.0" + "@types/node": "^25.5.0" "@types/node-forge": "^1.3.11" "@types/yargs": "^17.0.24" esbuild: "^0.28.0" From 6262f2f26b2e4f8b8caff4eb84d61b86b55af5a9 Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Tue, 21 Apr 2026 08:11:37 +0100 Subject: [PATCH 20/65] CCM-16073 - PR feedback --- .../src/__tests__/endpoint-gate.test.ts | 118 ----------------- .../src/__tests__/handler.test.ts | 2 + .../src/__tests__/redis-client.test.ts | 123 ++++++++++++++++++ lambdas/https-client-lambda/src/handler.ts | 4 +- .../src/services/endpoint-gate.ts | 95 +------------- .../src/services/redis-client.ts | 96 ++++++++++++++ .../fixtures/subscriptions/mock-client-1.json | 4 +- .../fixtures/subscriptions/mock-client-2.json | 8 +- .../mock-client-circuit-breaker.json | 4 +- .../subscriptions/mock-client-mtls.json | 4 +- .../subscriptions/mock-client-rate-limit.json | 4 +- 11 files changed, 236 insertions(+), 226 deletions(-) create mode 100644 lambdas/https-client-lambda/src/__tests__/redis-client.test.ts create mode 100644 lambdas/https-client-lambda/src/services/redis-client.ts diff --git a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts index 394a89b6..84984c71 100644 --- a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts @@ -1,43 +1,15 @@ import { type EndpointGateConfig, admit, - getRedisClient, recordResult, resetAdmitSha, - resetRedisClient, } from "services/endpoint-gate"; -jest.mock("@nhs-notify-client-callbacks/logger"); - -const mockPresign = jest.fn().mockResolvedValue({ - hostname: "cache.example.invalid", - path: "/", - query: { "X-Amz-Signature": "mock-sig" }, -}); - -jest.mock("@smithy/signature-v4", () => ({ - SignatureV4: jest.fn().mockImplementation(() => ({ presign: mockPresign })), -})); - -jest.mock("@aws-sdk/credential-providers", () => ({ - fromNodeProviderChain: jest.fn(), -})); - const mockSendCommand = jest.fn(); const mockConnect = jest.fn().mockResolvedValue(undefined); const mockDisconnect = jest.fn().mockResolvedValue(undefined); const mockOn = jest.fn(); -jest.mock("@redis/client", () => ({ - createClient: jest.fn(() => ({ - sendCommand: mockSendCommand, - connect: mockConnect, - disconnect: mockDisconnect, - on: mockOn, - isOpen: true, - })), -})); - const defaultConfig: EndpointGateConfig = { burstCapacity: 10, cbProbeIntervalMs: 60_000, @@ -232,93 +204,3 @@ describe("recordResult", () => { expect(args[3]).toBe("cb:my-target"); }); }); - -describe("getRedisClient", () => { - beforeEach(() => { - resetRedisClient(); - delete process.env.ELASTICACHE_ENDPOINT; - delete process.env.ELASTICACHE_CACHE_NAME; - delete process.env.ELASTICACHE_IAM_USERNAME; - }); - - it("throws when ELASTICACHE_ENDPOINT is not set", async () => { - await expect(getRedisClient()).rejects.toThrow( - "ELASTICACHE_ENDPOINT is required", - ); - }); - - it("throws when ELASTICACHE_IAM_USERNAME is not set", async () => { - process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; - - await expect(getRedisClient()).rejects.toThrow( - "ELASTICACHE_IAM_USERNAME is required", - ); - }); - - it("throws when ELASTICACHE_CACHE_NAME is not set", async () => { - process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; - process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; - - await expect(getRedisClient()).rejects.toThrow( - "ELASTICACHE_CACHE_NAME, ELASTICACHE_ENDPOINT, and ELASTICACHE_IAM_USERNAME are required", - ); - }); - - it("creates and connects a Redis client with IAM token", async () => { - process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; - process.env.ELASTICACHE_CACHE_NAME = "my-cache"; - process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; - - const client = await getRedisClient(); - - expect(client).toBeDefined(); - expect(mockPresign).toHaveBeenCalled(); - expect(mockConnect).toHaveBeenCalled(); - }); - - it("returns cached client when already open and token is valid", async () => { - process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; - process.env.ELASTICACHE_CACHE_NAME = "my-cache"; - process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; - - const first = await getRedisClient(); - const second = await getRedisClient(); - - expect(first).toBe(second); - expect(mockConnect).toHaveBeenCalledTimes(1); - expect(mockPresign).toHaveBeenCalledTimes(1); - }); - - it("registers error handler on client", async () => { - process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; - process.env.ELASTICACHE_CACHE_NAME = "my-cache"; - process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; - - await getRedisClient(); - - expect(mockOn).toHaveBeenCalledWith("error", expect.any(Function)); - - const errorHandler = mockOn.mock.calls.find( - (c: unknown[]) => c[0] === "error", - )![1] as (err: Error) => void; - errorHandler(new Error("test error")); - }); - - it("disconnects existing client when token expires before reconnecting", async () => { - jest.useFakeTimers(); - process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; - process.env.ELASTICACHE_CACHE_NAME = "my-cache"; - process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; - - await getRedisClient(); - - jest.advanceTimersByTime(841_000); - - await getRedisClient(); - - expect(mockDisconnect).toHaveBeenCalledTimes(1); - expect(mockConnect).toHaveBeenCalledTimes(2); - - jest.useRealTimers(); - }); -}); diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 8b24d5e8..196fba34 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -64,6 +64,8 @@ const mockRecordResult = jest.fn(); jest.mock("services/endpoint-gate", () => ({ admit: (...args: unknown[]) => mockAdmit(...args), recordResult: (...args: unknown[]) => mockRecordResult(...args), +})); +jest.mock("services/redis-client", () => ({ getRedisClient: (...args: unknown[]) => mockGetRedisClient(...args), })); diff --git a/lambdas/https-client-lambda/src/__tests__/redis-client.test.ts b/lambdas/https-client-lambda/src/__tests__/redis-client.test.ts new file mode 100644 index 00000000..3cd9513f --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/redis-client.test.ts @@ -0,0 +1,123 @@ +import { getRedisClient, resetRedisClient } from "services/redis-client"; + +jest.mock("@nhs-notify-client-callbacks/logger"); + +const mockPresign = jest.fn().mockResolvedValue({ + hostname: "cache.example.invalid", + path: "/", + query: { "X-Amz-Signature": "mock-sig" }, +}); + +jest.mock("@smithy/signature-v4", () => ({ + SignatureV4: jest.fn().mockImplementation(() => ({ presign: mockPresign })), +})); + +jest.mock("@aws-sdk/credential-providers", () => ({ + fromNodeProviderChain: jest.fn(), +})); + +const mockSendCommand = jest.fn(); +const mockConnect = jest.fn().mockResolvedValue(undefined); +const mockDisconnect = jest.fn().mockResolvedValue(undefined); +const mockOn = jest.fn(); + +jest.mock("@redis/client", () => ({ + createClient: jest.fn(() => ({ + sendCommand: mockSendCommand, + connect: mockConnect, + disconnect: mockDisconnect, + on: mockOn, + isOpen: true, + })), +})); + +beforeEach(() => { + jest.clearAllMocks(); + resetRedisClient(); + delete process.env.ELASTICACHE_ENDPOINT; + delete process.env.ELASTICACHE_CACHE_NAME; + delete process.env.ELASTICACHE_IAM_USERNAME; +}); + +describe("getRedisClient", () => { + it("throws when ELASTICACHE_ENDPOINT is not set", async () => { + await expect(getRedisClient()).rejects.toThrow( + "ELASTICACHE_ENDPOINT is required", + ); + }); + + it("throws when ELASTICACHE_IAM_USERNAME is not set", async () => { + process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; + + await expect(getRedisClient()).rejects.toThrow( + "ELASTICACHE_IAM_USERNAME is required", + ); + }); + + it("throws when ELASTICACHE_CACHE_NAME is not set", async () => { + process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; + process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; + + await expect(getRedisClient()).rejects.toThrow( + "ELASTICACHE_CACHE_NAME, ELASTICACHE_ENDPOINT, and ELASTICACHE_IAM_USERNAME are required", + ); + }); + + it("creates and connects a Redis client with IAM token", async () => { + process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; + process.env.ELASTICACHE_CACHE_NAME = "my-cache"; + process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; + + const client = await getRedisClient(); + + expect(client).toBeDefined(); + expect(mockPresign).toHaveBeenCalled(); + expect(mockConnect).toHaveBeenCalled(); + }); + + it("returns cached client when already open and token is valid", async () => { + process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; + process.env.ELASTICACHE_CACHE_NAME = "my-cache"; + process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; + + const first = await getRedisClient(); + const second = await getRedisClient(); + + expect(first).toBe(second); + expect(mockConnect).toHaveBeenCalledTimes(1); + expect(mockPresign).toHaveBeenCalledTimes(1); + }); + + it("registers error handler on client", async () => { + process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; + process.env.ELASTICACHE_CACHE_NAME = "my-cache"; + process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; + + await getRedisClient(); + + expect(mockOn).toHaveBeenCalledWith("error", expect.any(Function)); + + const errorHandler = mockOn.mock.calls.find( + (c: unknown[]) => c[0] === "error", + )![1] as (err: Error) => void; + errorHandler(new Error("test error")); + }); + + it("disconnects existing client when token expires before reconnecting", async () => { + jest.useFakeTimers(); + process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; + process.env.ELASTICACHE_CACHE_NAME = "my-cache"; + process.env.ELASTICACHE_IAM_USERNAME = "iam-user"; + + await getRedisClient(); + + jest.advanceTimersByTime(841_000); + + await getRedisClient(); + + expect(mockDisconnect).toHaveBeenCalledTimes(1); + expect(mockConnect).toHaveBeenCalledTimes(2); + + jest.useRealTimers(); + }); +}); diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index e30d5502..c9b6e469 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -18,9 +18,9 @@ import { import { type EndpointGateConfig, admit, - getRedisClient, recordResult, } from "services/endpoint-gate"; +import { getRedisClient } from "services/redis-client"; import { recordAdmissionDenied, recordCircuitBreakerClosed, @@ -37,7 +37,7 @@ import { flushMetrics } from "services/delivery-metrics"; type RedisClientType = Awaited>; -const DEFAULT_MAX_RETRY_DURATION_MS = 7_200_000; +const DEFAULT_MAX_RETRY_DURATION_MS = 7_200_000; // 2 hours const DEFAULT_CONCURRENCY_LIMIT = 5; const gateConfig: EndpointGateConfig = { diff --git a/lambdas/https-client-lambda/src/services/endpoint-gate.ts b/lambdas/https-client-lambda/src/services/endpoint-gate.ts index c0dd1b60..81a98290 100644 --- a/lambdas/https-client-lambda/src/services/endpoint-gate.ts +++ b/lambdas/https-client-lambda/src/services/endpoint-gate.ts @@ -1,9 +1,5 @@ -import { type RedisClientType, createClient } from "@redis/client"; -import { SignatureV4 } from "@smithy/signature-v4"; -import { Sha256 } from "@aws-crypto/sha256-js"; -import { fromNodeProviderChain } from "@aws-sdk/credential-providers"; +import type { RedisClientType } from "services/redis-client"; import { createHash } from "node:crypto"; -import { logger } from "@nhs-notify-client-callbacks/logger"; import admitLuaSrc from "services/admit.lua"; import recordResultLuaSrc from "services/record-result.lua"; @@ -167,92 +163,3 @@ export function resetAdmitSha(): void { admitSha = undefined; recordResultSha = undefined; } - -const TOKEN_EXPIRY_SECONDS = 900; -const TOKEN_REFRESH_BUFFER_SECONDS = 60; - -let redisClient: RedisClientType | undefined; -let tokenExpiry = 0; - -async function generateElastiCacheIamToken(): Promise { - const cacheName = process.env.ELASTICACHE_CACHE_NAME; - const endpoint = process.env.ELASTICACHE_ENDPOINT; - const username = process.env.ELASTICACHE_IAM_USERNAME; - - if (!cacheName || !endpoint || !username) { - throw new Error( - "ELASTICACHE_CACHE_NAME, ELASTICACHE_ENDPOINT, and ELASTICACHE_IAM_USERNAME are required", - ); - } - - const region = process.env.AWS_REGION ?? "eu-west-2"; - - const signer = new SignatureV4({ - credentials: fromNodeProviderChain(), - region, - service: "elasticache", - sha256: Sha256, - }); - - const signed = await signer.presign( - { - protocol: "https:", - method: "GET", - hostname: endpoint, - path: "/", - query: { Action: "connect", User: username }, - headers: { host: endpoint }, - }, - { expiresIn: TOKEN_EXPIRY_SECONDS }, - ); - - tokenExpiry = Date.now() + TOKEN_EXPIRY_SECONDS * 1000; - - const qs = new URLSearchParams( - signed.query as Record, - ).toString(); - return `https://${signed.hostname}${signed.path}?${qs}`; -} - -export async function getRedisClient(): Promise { - const isTokenValid = - tokenExpiry > Date.now() + TOKEN_REFRESH_BUFFER_SECONDS * 1000; - - if (redisClient?.isOpen && isTokenValid) { - return redisClient; - } - - const endpoint = process.env.ELASTICACHE_ENDPOINT; - if (!endpoint) { - throw new Error("ELASTICACHE_ENDPOINT is required"); - } - - const username = process.env.ELASTICACHE_IAM_USERNAME; - if (!username) { - throw new Error("ELASTICACHE_IAM_USERNAME is required"); - } - - if (redisClient?.isOpen) { - await redisClient.disconnect(); - } - - const token = await generateElastiCacheIamToken(); - - redisClient = createClient({ - url: `rediss://${endpoint}:6379`, - username, - password: token, - }); - - redisClient.on("error", (err) => { - logger.error("Redis connection error", { error: String(err) }); - }); - - await redisClient.connect(); - return redisClient; -} - -export function resetRedisClient(): void { - redisClient = undefined; - tokenExpiry = 0; -} diff --git a/lambdas/https-client-lambda/src/services/redis-client.ts b/lambdas/https-client-lambda/src/services/redis-client.ts new file mode 100644 index 00000000..bfe9e29c --- /dev/null +++ b/lambdas/https-client-lambda/src/services/redis-client.ts @@ -0,0 +1,96 @@ +import { type RedisClientType, createClient } from "@redis/client"; +import { SignatureV4 } from "@smithy/signature-v4"; +import { Sha256 } from "@aws-crypto/sha256-js"; +import { fromNodeProviderChain } from "@aws-sdk/credential-providers"; +import { logger } from "@nhs-notify-client-callbacks/logger"; + +const TOKEN_EXPIRY_SECONDS = 900; +const TOKEN_REFRESH_BUFFER_SECONDS = 60; + +let redisClient: RedisClientType | undefined; +let tokenExpiry = 0; + +async function generateElastiCacheIamToken(): Promise { + const cacheName = process.env.ELASTICACHE_CACHE_NAME; + const endpoint = process.env.ELASTICACHE_ENDPOINT; + const username = process.env.ELASTICACHE_IAM_USERNAME; + + if (!cacheName || !endpoint || !username) { + throw new Error( + "ELASTICACHE_CACHE_NAME, ELASTICACHE_ENDPOINT, and ELASTICACHE_IAM_USERNAME are required", + ); + } + + const region = process.env.AWS_REGION ?? "eu-west-2"; + + const signer = new SignatureV4({ + credentials: fromNodeProviderChain(), + region, + service: "elasticache", + sha256: Sha256, + }); + + const signed = await signer.presign( + { + protocol: "https:", + method: "GET", + hostname: endpoint, + path: "/", + query: { Action: "connect", User: username }, + headers: { host: endpoint }, + }, + { expiresIn: TOKEN_EXPIRY_SECONDS }, + ); + + tokenExpiry = Date.now() + TOKEN_EXPIRY_SECONDS * 1000; + + const qs = new URLSearchParams( + signed.query as Record, + ).toString(); + return `https://${signed.hostname}${signed.path}?${qs}`; +} + +export async function getRedisClient(): Promise { + const isTokenValid = + tokenExpiry > Date.now() + TOKEN_REFRESH_BUFFER_SECONDS * 1000; + + if (redisClient?.isOpen && isTokenValid) { + return redisClient; + } + + const endpoint = process.env.ELASTICACHE_ENDPOINT; + if (!endpoint) { + throw new Error("ELASTICACHE_ENDPOINT is required"); + } + + const username = process.env.ELASTICACHE_IAM_USERNAME; + if (!username) { + throw new Error("ELASTICACHE_IAM_USERNAME is required"); + } + + if (redisClient?.isOpen) { + await redisClient.disconnect(); + } + + const token = await generateElastiCacheIamToken(); + + redisClient = createClient({ + url: `rediss://${endpoint}:6379`, + username, + password: token, + }); + + redisClient.on("error", (err) => { + logger.error("Redis connection error", { error: String(err) }); + }); + + await redisClient.connect(); + return redisClient; +} + +export function resetRedisClient(): void { + redisClient = undefined; + tokenExpiry = 0; +} + +export { type RedisClientType } from "@redis/client"; diff --git a/tests/integration/fixtures/subscriptions/mock-client-1.json b/tests/integration/fixtures/subscriptions/mock-client-1.json index 2542e92b..4dd6c078 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-1.json +++ b/tests/integration/fixtures/subscriptions/mock-client-1.json @@ -37,10 +37,10 @@ }, "delivery": { "mtls": { - "enabled": false, "certPinning": { "enabled": false - } + }, + "enabled": false } }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", diff --git a/tests/integration/fixtures/subscriptions/mock-client-2.json b/tests/integration/fixtures/subscriptions/mock-client-2.json index 711b8f9f..50997fb6 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-2.json +++ b/tests/integration/fixtures/subscriptions/mock-client-2.json @@ -22,10 +22,10 @@ }, "delivery": { "mtls": { - "enabled": false, "certPinning": { "enabled": false - } + }, + "enabled": false } }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", @@ -41,10 +41,10 @@ }, "delivery": { "mtls": { - "enabled": false, "certPinning": { "enabled": false - } + }, + "enabled": false } }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", diff --git a/tests/integration/fixtures/subscriptions/mock-client-circuit-breaker.json b/tests/integration/fixtures/subscriptions/mock-client-circuit-breaker.json index f46e2dd7..58243d3d 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-circuit-breaker.json +++ b/tests/integration/fixtures/subscriptions/mock-client-circuit-breaker.json @@ -24,10 +24,10 @@ "enabled": true }, "mtls": { - "enabled": false, "certPinning": { "enabled": false - } + }, + "enabled": false } }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", diff --git a/tests/integration/fixtures/subscriptions/mock-client-mtls.json b/tests/integration/fixtures/subscriptions/mock-client-mtls.json index d9879679..0fce5d72 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-mtls.json +++ b/tests/integration/fixtures/subscriptions/mock-client-mtls.json @@ -21,11 +21,11 @@ }, "delivery": { "mtls": { - "enabled": true, "certPinning": { "enabled": true, "spkiHash": "REPLACED_BY_TERRAFORM" - } + }, + "enabled": true } }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", diff --git a/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json b/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json index 35271dce..21e53636 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json +++ b/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json @@ -21,10 +21,10 @@ }, "delivery": { "mtls": { - "enabled": false, "certPinning": { "enabled": false - } + }, + "enabled": false } }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", From 7297c184b9916d9d4c2235e7e741c1d5acf5bef2 Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Tue, 21 Apr 2026 11:05:03 +0100 Subject: [PATCH 21/65] CCM-16073 - PR feedback --- .../callbacks/elasticache_delivery_state.tf | 40 ++++++++++++ .../modules/client-delivery/README.md | 2 +- .../modules/client-delivery/variables.tf | 2 +- .../src/__tests__/handler.test.ts | 63 +++++++++++++++++++ lambdas/https-client-lambda/src/handler.ts | 17 ++++- .../config/vocabularies/words/accept.txt | 3 +- 6 files changed, 121 insertions(+), 6 deletions(-) diff --git a/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf b/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf index c2f8fa86..58a675aa 100644 --- a/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf +++ b/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf @@ -1,3 +1,41 @@ +resource "aws_elasticache_user" "delivery_state_default" { + user_id = "${local.csi}-delivery-state-default" + user_name = "default" + engine = "valkey" + access_string = "off -@all" + + authentication_mode { + type = "no-password-required" + } + + tags = local.default_tags +} + +resource "aws_elasticache_user" "delivery_state_iam" { + user_id = "${local.csi}-elasticache-user" + user_name = "${local.csi}-elasticache-user" + engine = "valkey" + access_string = "on ~* &* +@all" + + authentication_mode { + type = "iam" + } + + tags = local.default_tags +} + +resource "aws_elasticache_user_group" "delivery_state" { + engine = "valkey" + user_group_id = "${local.csi}-delivery-state" + + user_ids = [ + aws_elasticache_user.delivery_state_default.user_id, + aws_elasticache_user.delivery_state_iam.user_id, + ] + + tags = local.default_tags +} + resource "aws_elasticache_serverless_cache" "delivery_state" { name = "${local.csi}-delivery-state" engine = "valkey" @@ -6,6 +44,8 @@ resource "aws_elasticache_serverless_cache" "delivery_state" { snapshot_retention_limit = 0 + user_group_id = aws_elasticache_user_group.delivery_state.user_group_id + security_group_ids = [aws_security_group.elasticache_delivery_state.id] subnet_ids = try(local.acct.private_subnets[local.bc_name], []) diff --git a/infrastructure/terraform/modules/client-delivery/README.md b/infrastructure/terraform/modules/client-delivery/README.md index 0552f282..0a4965e7 100644 --- a/infrastructure/terraform/modules/client-delivery/README.md +++ b/infrastructure/terraform/modules/client-delivery/README.md @@ -41,7 +41,7 @@ No requirements. | [mtls\_test\_cert\_s3\_key](#input\_mtls\_test\_cert\_s3\_key) | S3 key for dev mTLS test certificate bundle | `string` | `""` | no | | [project](#input\_project) | The name of the tfscaffold project | `string` | n/a | yes | | [region](#input\_region) | AWS Region | `string` | n/a | yes | -| [sqs\_max\_receive\_count](#input\_sqs\_max\_receive\_count) | Maximum receive count before message moves to DLQ | `number` | `100` | no | +| [sqs\_max\_receive\_count](#input\_sqs\_max\_receive\_count) | Safety-net maximum receive count before a message moves to DLQ. Supplements the time-based retry window for cases where the Lambda fails before reaching the window check. | `number` | `100` | no | | [sqs\_visibility\_timeout\_seconds](#input\_sqs\_visibility\_timeout\_seconds) | Visibility timeout for the per-client delivery queue | `number` | `60` | no | | [subscription\_targets](#input\_subscription\_targets) | Flattened subscription-target fanout map keyed by subscription-target composite key |
map(object({
subscription_id = string
target_id = string
}))
| n/a | yes | | [subscriptions](#input\_subscriptions) | Subscription definitions for this client, keyed by subscription\_id |
map(object({
subscription_id = string
target_ids = list(string)
}))
| n/a | yes | diff --git a/infrastructure/terraform/modules/client-delivery/variables.tf b/infrastructure/terraform/modules/client-delivery/variables.tf index aabdcda6..643e163e 100644 --- a/infrastructure/terraform/modules/client-delivery/variables.tf +++ b/infrastructure/terraform/modules/client-delivery/variables.tf @@ -147,7 +147,7 @@ variable "sqs_visibility_timeout_seconds" { variable "sqs_max_receive_count" { type = number - description = "Maximum receive count before message moves to DLQ" + description = "Safety-net maximum receive count before a message moves to DLQ. Supplements the time-based retry window for cases where the Lambda fails before reaching the window check." default = 100 } diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 196fba34..2b758c1b 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -197,6 +197,40 @@ describe("processRecords", () => { expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); expect(mockDeliverPayload).toHaveBeenCalledTimes(1); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 5); + }); + + it("applies jittered backoff cooldown on unexpected errors", async () => { + mockLoadTargetConfig.mockRejectedValue(new Error("Infrastructure error")); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 5); + }); + + it("does not apply a second visibility change for admission-denied (managed path)", async () => { + mockAdmit.mockResolvedValue({ + allowed: false, + reason: "rate_limited", + retryAfterMs: 2000, + effectiveRate: 10, + }); + + await processRecords([makeRecord()]); + + expect(mockChangeVisibility).toHaveBeenCalledTimes(1); + }); + + it("does not apply a second visibility change for transient failure (managed path)", async () => { + mockDeliverPayload.mockResolvedValue({ + outcome: "transient_failure", + statusCode: 503, + }); + + await processRecords([makeRecord()]); + + expect(mockChangeVisibility).toHaveBeenCalledTimes(1); }); it("returns failure when CLIENT_ID is not set", async () => { @@ -249,6 +283,18 @@ describe("processRecords", () => { ); }); + it("returns no failure when handleRateLimitedRecord resolves (e.g. DLQ path)", async () => { + mockDeliverPayload.mockResolvedValue({ + outcome: "rate_limited", + retryAfterHeader: "99999", + }); + mockHandleRateLimitedRecord.mockResolvedValue(undefined); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([]); + }); + it("requeues when rate limited by endpoint gate", async () => { mockAdmit.mockResolvedValue({ allowed: false, @@ -470,4 +516,21 @@ describe("processRecords", () => { expect(emitRateLimited).toHaveBeenCalledWith("target-1"); }); + + it("uses configured maxRetryDurationSeconds when set on target", async () => { + const targetWithRetry = { + ...DEFAULT_TARGET, + delivery: { ...DEFAULT_TARGET.delivery, maxRetryDurationSeconds: 3600 }, + }; + mockLoadTargetConfig.mockResolvedValue(targetWithRetry); + mockIsWindowExhausted.mockReturnValue(false); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([]); + expect(mockIsWindowExhausted).toHaveBeenCalledWith( + expect.any(Number), + 3_600_000, + ); + }); }); diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index c9b6e469..5fd97c31 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -40,6 +40,8 @@ type RedisClientType = Awaited>; const DEFAULT_MAX_RETRY_DURATION_MS = 7_200_000; // 2 hours const DEFAULT_CONCURRENCY_LIMIT = 5; +class VisibilityManagedError extends Error {} + const gateConfig: EndpointGateConfig = { burstCapacity: Number(process.env.TOKEN_BUCKET_BURST_CAPACITY ?? "10"), cbProbeIntervalMs: Number(process.env.CB_PROBE_INTERVAL_MS ?? "60000"), @@ -76,7 +78,7 @@ async function checkAdmission( const delaySec = Math.ceil(gateResult.retryAfterMs / 1000); recordAdmissionDenied(clientId, targetId, gateResult.reason); await changeVisibility(record.receiptHandle, delaySec); - throw new Error(`Admission denied: ${gateResult.reason}`); + throw new VisibilityManagedError(`Admission denied: ${gateResult.reason}`); } } @@ -128,7 +130,7 @@ async function handleDeliveryResult( } recordDeliveryFailure(clientId, targetId, result.statusCode, backoffSec); await changeVisibility(record.receiptHandle, backoffSec); - throw new Error(`Transient failure: ${result.statusCode}`); + throw new VisibilityManagedError(`Transient failure: ${result.statusCode}`); } async function processRecord( @@ -211,7 +213,16 @@ export async function processRecords( try { await processRecord(record, redis); return null; - } catch { + } catch (error) { + if (!(error instanceof VisibilityManagedError)) { + const receiveCount = Number( + record.attributes.ApproximateReceiveCount, + ); + await changeVisibility( + record.receiptHandle, + jitteredBackoffSeconds(receiveCount), + ); + } return { itemIdentifier: record.messageId }; } }, diff --git a/scripts/config/vale/styles/config/vocabularies/words/accept.txt b/scripts/config/vale/styles/config/vocabularies/words/accept.txt index ed5cb1a6..535b5e1d 100644 --- a/scripts/config/vale/styles/config/vocabularies/words/accept.txt +++ b/scripts/config/vale/styles/config/vocabularies/words/accept.txt @@ -28,12 +28,13 @@ npm OAuth Octokit onboarding +pnpm Podman Python queryable rawContent read_file -repo +[rR][eE][pP][oO] [Rr]unbook sed Syft From 0ab926d96d9bd2e40db720c73238500278806621 Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Wed, 22 Apr 2026 08:15:16 +0100 Subject: [PATCH 22/65] CCM-16073 - PR feedback --- .../src/__tests__/handler.test.ts | 7 ++++- .../src/__tests__/retry-policy.test.ts | 3 +- lambdas/https-client-lambda/src/handler.ts | 16 +++++++---- .../src/services/delivery/https-client.ts | 28 ++++++++++++------- .../src/services/delivery/retry-policy.ts | 3 +- .../src/services/visibility-managed-error.ts | 1 + 6 files changed, 39 insertions(+), 19 deletions(-) create mode 100644 lambdas/https-client-lambda/src/services/visibility-managed-error.ts diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 2b758c1b..a31cc61c 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -3,6 +3,7 @@ import { DEFAULT_TARGET, makeRecord, } from "__tests__/fixtures/handler-fixtures"; +import { VisibilityManagedError } from "services/visibility-managed-error"; jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { @@ -36,6 +37,10 @@ jest.mock("services/delivery/tls-agent-factory", () => ({ const mockDeliverPayload = jest.fn(); jest.mock("services/delivery/https-client", () => ({ deliverPayload: (...args: unknown[]) => mockDeliverPayload(...args), + OUTCOME_SUCCESS: "success", + OUTCOME_PERMANENT_FAILURE: "permanent_failure", + OUTCOME_RATE_LIMITED: "rate_limited", + OUTCOME_TRANSIENT_FAILURE: "transient_failure", })); const mockSendToDlq = jest.fn(); @@ -100,7 +105,7 @@ describe("processRecords", () => { mockJitteredBackoff.mockReturnValue(5); mockIsWindowExhausted.mockReturnValue(false); mockHandleRateLimitedRecord.mockRejectedValue( - new Error("Rate limited — requeue"), + new VisibilityManagedError("Rate limited — requeue"), ); mockGetRedisClient.mockResolvedValue({}); mockAdmit.mockResolvedValue({ diff --git a/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts b/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts index 97bb734b..de828762 100644 --- a/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts @@ -6,6 +6,7 @@ import { jitteredBackoffSeconds, parseRetryAfter, } from "services/delivery/retry-policy"; +import { VisibilityManagedError } from "services/visibility-managed-error"; const mockSendToDlq = jest.fn(); jest.mock("services/dlq-sender", () => ({ @@ -170,6 +171,6 @@ describe("handleRateLimitedRecord", () => { it("throws after requeuing so SQS marks the record as failed", async () => { await expect( handleRateLimitedRecord(makeRecord(), "client-1", "target-1", "30", 1), - ).rejects.toThrow("Rate limited — requeue"); + ).rejects.toThrow(VisibilityManagedError); }); }); diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index 5fd97c31..f552ea0d 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -6,7 +6,12 @@ import { loadTargetConfig } from "services/config-loader"; import { getApplicationId } from "services/ssm-applications-map"; import { signPayload } from "services/payload-signer"; import { buildAgent } from "services/delivery/tls-agent-factory"; -import { deliverPayload } from "services/delivery/https-client"; +import { + OUTCOME_PERMANENT_FAILURE, + OUTCOME_RATE_LIMITED, + OUTCOME_SUCCESS, + deliverPayload, +} from "services/delivery/https-client"; import type { DeliveryResult } from "services/delivery/https-client"; import { sendToDlq } from "services/dlq-sender"; import { changeVisibility } from "services/sqs-visibility"; @@ -21,6 +26,7 @@ import { recordResult, } from "services/endpoint-gate"; import { getRedisClient } from "services/redis-client"; +import { VisibilityManagedError } from "services/visibility-managed-error"; import { recordAdmissionDenied, recordCircuitBreakerClosed, @@ -40,8 +46,6 @@ type RedisClientType = Awaited>; const DEFAULT_MAX_RETRY_DURATION_MS = 7_200_000; // 2 hours const DEFAULT_CONCURRENCY_LIMIT = 5; -class VisibilityManagedError extends Error {} - const gateConfig: EndpointGateConfig = { burstCapacity: Number(process.env.TOKEN_BUCKET_BURST_CAPACITY ?? "10"), cbProbeIntervalMs: Number(process.env.CB_PROBE_INTERVAL_MS ?? "60000"), @@ -90,7 +94,7 @@ async function handleDeliveryResult( targetId: string, cbEnabled: boolean, ): Promise { - if (result.outcome === "success") { + if (result.outcome === OUTCOME_SUCCESS) { if (cbEnabled) { const cbOutcome = await recordResult(redis, targetId, true, gateConfig); if (cbOutcome.ok && cbOutcome.state === "closed") { @@ -101,13 +105,13 @@ async function handleDeliveryResult( return; } - if (result.outcome === "permanent_failure") { + if (result.outcome === OUTCOME_PERMANENT_FAILURE) { recordDeliveryPermanentFailure(clientId, targetId); await sendToDlq(record.body); return; } - if (result.outcome === "rate_limited") { + if (result.outcome === OUTCOME_RATE_LIMITED) { const receiveCount = Number(record.attributes.ApproximateReceiveCount); recordDeliveryRateLimited(clientId, targetId); await handleRateLimitedRecord( diff --git a/lambdas/https-client-lambda/src/services/delivery/https-client.ts b/lambdas/https-client-lambda/src/services/delivery/https-client.ts index c651fe6d..418d7563 100644 --- a/lambdas/https-client-lambda/src/services/delivery/https-client.ts +++ b/lambdas/https-client-lambda/src/services/delivery/https-client.ts @@ -3,11 +3,19 @@ import type { Agent } from "node:https"; import type { CallbackTarget } from "@nhs-notify-client-callbacks/models"; import { PERMANENT_TLS_ERROR_CODES } from "services/delivery/tls-agent-factory"; +export const OUTCOME_SUCCESS = "success" as const; +export const OUTCOME_PERMANENT_FAILURE = "permanent_failure" as const; +export const OUTCOME_RATE_LIMITED = "rate_limited" as const; +export const OUTCOME_TRANSIENT_FAILURE = "transient_failure" as const; + export type DeliveryResult = - | { outcome: "success" } - | { outcome: "permanent_failure" } - | { outcome: "rate_limited"; retryAfterHeader: string | undefined } - | { outcome: "transient_failure"; statusCode: number }; + | { outcome: typeof OUTCOME_SUCCESS } + | { outcome: typeof OUTCOME_PERMANENT_FAILURE } + | { + outcome: typeof OUTCOME_RATE_LIMITED; + retryAfterHeader: string | undefined; + } + | { outcome: typeof OUTCOME_TRANSIENT_FAILURE; statusCode: number }; export function deliverPayload( target: CallbackTarget, @@ -38,25 +46,25 @@ export function deliverPayload( const statusCode = res.statusCode ?? 0; if (statusCode >= 200 && statusCode < 300) { - resolve({ outcome: "success" }); + resolve({ outcome: OUTCOME_SUCCESS }); return; } if (statusCode === 429) { const retryAfterHeader = res.headers["retry-after"]; resolve({ - outcome: "rate_limited", + outcome: OUTCOME_RATE_LIMITED, retryAfterHeader, }); return; } if (statusCode >= 400 && statusCode < 500) { - resolve({ outcome: "permanent_failure" }); + resolve({ outcome: OUTCOME_PERMANENT_FAILURE }); return; } - resolve({ outcome: "transient_failure", statusCode }); + resolve({ outcome: OUTCOME_TRANSIENT_FAILURE, statusCode }); }, ); @@ -66,11 +74,11 @@ export function deliverPayload( req.on("error", (error: NodeJS.ErrnoException) => { if (error.code && PERMANENT_TLS_ERROR_CODES.has(error.code)) { - resolve({ outcome: "permanent_failure" }); + resolve({ outcome: OUTCOME_PERMANENT_FAILURE }); return; } - resolve({ outcome: "transient_failure", statusCode: 0 }); + resolve({ outcome: OUTCOME_TRANSIENT_FAILURE, statusCode: 0 }); }); req.end(signedPayloadJson); diff --git a/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts b/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts index 2bc7cd12..2bde6516 100644 --- a/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts +++ b/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts @@ -2,6 +2,7 @@ import type { SQSRecord } from "aws-lambda"; import { logger } from "@nhs-notify-client-callbacks/logger"; import { sendToDlq } from "services/dlq-sender"; import { changeVisibility } from "services/sqs-visibility"; +import { VisibilityManagedError } from "services/visibility-managed-error"; const BACKOFF_CAP_SECONDS = 300; const SQS_MAX_VISIBILITY_SECONDS = 43_200; @@ -75,5 +76,5 @@ export async function handleRateLimitedRecord( delaySec, }); await changeVisibility(record.receiptHandle, delaySec); - throw new Error("Rate limited — requeue"); + throw new VisibilityManagedError("Rate limited — requeue"); } diff --git a/lambdas/https-client-lambda/src/services/visibility-managed-error.ts b/lambdas/https-client-lambda/src/services/visibility-managed-error.ts new file mode 100644 index 00000000..403c2162 --- /dev/null +++ b/lambdas/https-client-lambda/src/services/visibility-managed-error.ts @@ -0,0 +1 @@ +export class VisibilityManagedError extends Error {} From dc4343a857a14729b5a0673ebd9ffd25a9e93b77 Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Wed, 22 Apr 2026 08:39:11 +0100 Subject: [PATCH 23/65] CCM-16073 - PR feedback --- .../__tests__/client-config-schema.test.ts | 26 ++++++++-- src/models/src/client-config-schema.ts | 2 +- .../client-subscription-builder.test.ts | 47 +++++++++++++++++++ .../src/domain/client-subscription-builder.ts | 13 +++++ 4 files changed, 83 insertions(+), 5 deletions(-) diff --git a/src/models/src/__tests__/client-config-schema.test.ts b/src/models/src/__tests__/client-config-schema.test.ts index d9d68a40..fa90a061 100644 --- a/src/models/src/__tests__/client-config-schema.test.ts +++ b/src/models/src/__tests__/client-config-schema.test.ts @@ -226,9 +226,20 @@ describe("parseClientSubscriptionConfiguration", () => { ); }); - it("returns a failed parse result when maxRetryDurationSeconds is below 60", () => { + it("returns a failed parse result when maxRetryDurationSeconds is zero", () => { const config = createValidConfig(); - config.targets[0].delivery = { maxRetryDurationSeconds: 59 }; + config.targets[0].delivery = { maxRetryDurationSeconds: 0 }; + + const result = expectFailedParse( + parseClientSubscriptionConfiguration(config), + ); + + expect(result.success).toBe(false); + }); + + it("returns a failed parse result when maxRetryDurationSeconds is negative", () => { + const config = createValidConfig(); + config.targets[0].delivery = { maxRetryDurationSeconds: -1 }; const result = expectFailedParse( parseClientSubscriptionConfiguration(config), @@ -248,9 +259,16 @@ describe("parseClientSubscriptionConfiguration", () => { expect(result.success).toBe(false); }); - it("accepts maxRetryDurationSeconds at boundary value 60", () => { + it("accepts maxRetryDurationSeconds below 60", () => { + const config = createValidConfig(); + config.targets[0].delivery = { maxRetryDurationSeconds: 10 }; + + expect(parseClientSubscriptionConfiguration(config).success).toBe(true); + }); + + it("accepts maxRetryDurationSeconds at boundary value 1", () => { const config = createValidConfig(); - config.targets[0].delivery = { maxRetryDurationSeconds: 60 }; + config.targets[0].delivery = { maxRetryDurationSeconds: 1 }; expect(parseClientSubscriptionConfiguration(config).success).toBe(true); }); diff --git a/src/models/src/client-config-schema.ts b/src/models/src/client-config-schema.ts index cdc941f1..cae4587a 100644 --- a/src/models/src/client-config-schema.ts +++ b/src/models/src/client-config-schema.ts @@ -48,7 +48,7 @@ const targetSchema = z.object({ }), delivery: z .object({ - maxRetryDurationSeconds: z.number().min(60).max(43_200).optional(), + maxRetryDurationSeconds: z.number().positive().max(43_200).optional(), circuitBreaker: z .object({ enabled: z.boolean(), diff --git a/tools/client-subscriptions-management/src/__tests__/domain/client-subscription-builder.test.ts b/tools/client-subscriptions-management/src/__tests__/domain/client-subscription-builder.test.ts index 0ec7c4fc..edc4b857 100644 --- a/tools/client-subscriptions-management/src/__tests__/domain/client-subscription-builder.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/domain/client-subscription-builder.test.ts @@ -116,6 +116,53 @@ describe("buildTarget", () => { expect(warnSpy).not.toHaveBeenCalled(); }); + + it("emits warning when maxRetryDurationSeconds is below 60", () => { + buildTarget({ + apiEndpoint: "https://example.com/webhook", + apiKey: "secret", + rateLimit: 10, + maxRetryDurationSeconds: 30, + }); + + expect(warnSpy).toHaveBeenCalledWith( + expect.stringContaining("maxRetryDurationSeconds is 30s"), + ); + }); + + it("does not emit warning when maxRetryDurationSeconds is 60 or above", () => { + buildTarget({ + apiEndpoint: "https://example.com/webhook", + apiKey: "secret", + rateLimit: 10, + maxRetryDurationSeconds: 60, + mtls: { enabled: true }, + certPinning: { enabled: true, spkiHash: "abc123" }, + }); + + expect(warnSpy).not.toHaveBeenCalled(); + }); + + it("includes maxRetryDurationSeconds in delivery when provided", () => { + const result = buildTarget({ + apiEndpoint: "https://example.com/webhook", + apiKey: "secret", + rateLimit: 10, + maxRetryDurationSeconds: 3600, + }); + + expect(result.delivery?.maxRetryDurationSeconds).toBe(3600); + }); + + it("omits maxRetryDurationSeconds from delivery when not provided", () => { + const result = buildTarget({ + apiEndpoint: "https://example.com/webhook", + apiKey: "secret", + rateLimit: 10, + }); + + expect(result.delivery).not.toHaveProperty("maxRetryDurationSeconds"); + }); }); describe("buildMessageStatusSubscription", () => { it("builds message status subscription", () => { diff --git a/tools/client-subscriptions-management/src/domain/client-subscription-builder.ts b/tools/client-subscriptions-management/src/domain/client-subscription-builder.ts index 288c0d72..514c48d7 100644 --- a/tools/client-subscriptions-management/src/domain/client-subscription-builder.ts +++ b/tools/client-subscriptions-management/src/domain/client-subscription-builder.ts @@ -14,6 +14,7 @@ export type BuildTargetArgs = { apiKey: string; apiKeyHeaderName?: string; rateLimit: number; + maxRetryDurationSeconds?: number; mtls?: { enabled: boolean }; certPinning?: { enabled: boolean; spkiHash?: string }; }; @@ -56,6 +57,15 @@ export function buildTarget(args: BuildTargetArgs): CallbackTarget { warnings.push("Certificate pinning is enabled but mTLS is disabled"); } + if ( + args.maxRetryDurationSeconds !== undefined && + args.maxRetryDurationSeconds < 60 + ) { + warnings.push( + `maxRetryDurationSeconds is ${args.maxRetryDurationSeconds}s — values below 60s may exhaust the retry window before a single delivery attempt completes`, + ); + } + for (const warning of warnings) { console.warn(pc.bold(pc.red(`WARNING: ${warning}`))); } @@ -71,6 +81,9 @@ export function buildTarget(args: BuildTargetArgs): CallbackTarget { headerValue: args.apiKey, }, delivery: { + ...(args.maxRetryDurationSeconds !== undefined && { + maxRetryDurationSeconds: args.maxRetryDurationSeconds, + }), mtls: { ...mtls, certPinning, From b25a82224fed0cd71716709de0cbf45930c4d275 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Wed, 22 Apr 2026 09:40:17 +0100 Subject: [PATCH 24/65] CCM-16073 - Integration test fixes (#152) * ALB/webhook uses https for mTLS and non mTLS - remove http endpoint * Log thrown errors in http-client-lambda * Update int test debug script * Update SQS to webhook int tests to use correct queues * Update debug int script README * Fix redis script error and better logging for redis errors * Log status code of perm failures * fix: load test CA for server trust when mtls is disabled In test environments, the mock webhook ALB uses a server certificate signed by a locally-generated test CA. Previously, the CA was only loaded into the TLS agent when mtls.enabled was true (needed for client certificate auth). Targets with mtls.enabled: false used Node's default trust store, which does not include the test CA, causing every delivery attempt to fail with SELF_SIGNED_CERT_IN_CHAIN. Fix by loading the CA whenever MTLS_TEST_CA_S3_KEY is set, regardless of mtls.enabled. The client key and cert are still only applied when mtls.enabled is true. MTLS_TEST_CA_S3_KEY is not set in production, so non-mTLS targets in production are unaffected. * fix: set ERROR_CODE and ERROR_MESSAGE on DLQ messages for permanent delivery failures AWS API Destinations previously set these SQS message attributes automatically. After the migration to the https-client lambda, they were no longer being set. - Read the response body for 4xx responses (previously discarded with res.resume()) so the error message can be included in the DLQ message attributes - Set ERROR_CODE=HTTP_CLIENT_ERROR for 4xx webhook rejections, or the TLS error code (e.g. CERT_HAS_EXPIRED) for connection-level failures - Set ERROR_MESSAGE to the message field from the JSON response body, falling back to the raw body if not valid JSON - Extended sendToDlq to accept and forward MessageAttributes to SQS * Fix redrive IT dlq names * Fix metric IT dlq names * Fix alarm test * Bump test coverage * Fix redis client IAM / connectivity issues + logging improvements --- .../callbacks/elasticache_delivery_state.tf | 10 ++- .../terraform/components/callbacks/locals.tf | 2 +- .../callbacks/module_mock_webhook_alb_mtls.tf | 27 +------ .../src/__tests__/dlq-sender.test.ts | 77 +++++++++++++++++++ .../src/__tests__/endpoint-gate.test.ts | 60 ++++++++++----- .../src/__tests__/handler.test.ts | 21 ++++- .../src/__tests__/https-client.test.ts | 68 ++++++++++++++-- .../src/__tests__/tls-agent-factory.test.ts | 28 +++++++ lambdas/https-client-lambda/src/handler.ts | 13 +++- .../src/services/delivery-observability.ts | 4 + .../src/services/delivery/https-client.ts | 27 +++++-- .../services/delivery/tls-agent-factory.ts | 12 ++- .../src/services/dlq-sender.ts | 57 +++++++++++++- .../src/services/endpoint-gate.ts | 13 +++- .../src/services/redis-client.ts | 18 ++++- scripts/tests/integration-debug.sh | 72 ++++++++++------- scripts/tests/test.mk | 2 +- tests/integration/README.md | 29 +++++-- tests/integration/dlq-alarms.test.ts | 35 +++++---- tests/integration/dlq-redrive.test.ts | 24 +++--- .../integration/helpers/mock-client-config.ts | 23 ------ tests/integration/helpers/sqs.ts | 15 ++-- .../inbound-sqs-to-webhook.test.ts | 16 +++- tests/integration/metrics.test.ts | 4 +- 24 files changed, 489 insertions(+), 168 deletions(-) diff --git a/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf b/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf index 58a675aa..6b5d3da1 100644 --- a/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf +++ b/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf @@ -1,11 +1,17 @@ +resource "random_password" "elasticache_default_user" { + length = 32 + special = false +} + resource "aws_elasticache_user" "delivery_state_default" { - user_id = "${local.csi}-delivery-state-default" + user_id = "${local.csi}-valkey-default" user_name = "default" engine = "valkey" access_string = "off -@all" authentication_mode { - type = "no-password-required" + type = "password" + passwords = [random_password.elasticache_default_user.result] } tags = local.default_tags diff --git a/infrastructure/terraform/components/callbacks/locals.tf b/infrastructure/terraform/components/callbacks/locals.tf index 64bd622c..d80b5b7e 100644 --- a/infrastructure/terraform/components/callbacks/locals.tf +++ b/infrastructure/terraform/components/callbacks/locals.tf @@ -21,7 +21,7 @@ locals { targets = [ for target in try(client.targets, []) : merge(target, { - invocationEndpoint = try(target.delivery.mtls.enabled, false) ? "https://${aws_lb.mock_webhook_mtls[0].dns_name}/${target.targetId}" : "http://${aws_lb.mock_webhook_mtls[0].dns_name}/${target.targetId}" + invocationEndpoint = "https://${aws_lb.mock_webhook_mtls[0].dns_name}/${target.targetId}" apiKey = merge(target.apiKey, { headerValue = random_password.mock_webhook_api_key[0].result }) }) ] diff --git a/infrastructure/terraform/components/callbacks/module_mock_webhook_alb_mtls.tf b/infrastructure/terraform/components/callbacks/module_mock_webhook_alb_mtls.tf index 7e7badf8..eb8b6776 100644 --- a/infrastructure/terraform/components/callbacks/module_mock_webhook_alb_mtls.tf +++ b/infrastructure/terraform/components/callbacks/module_mock_webhook_alb_mtls.tf @@ -19,18 +19,7 @@ resource "aws_vpc_security_group_ingress_rule" "mock_webhook_alb_https" { from_port = 443 to_port = 443 ip_protocol = "tcp" - description = "Allow HTTPS Client Lambda to reach mock webhook via mTLS" - tags = local.default_tags -} - -resource "aws_vpc_security_group_ingress_rule" "mock_webhook_alb_http" { - count = var.deploy_mock_clients ? 1 : 0 - security_group_id = aws_security_group.mock_webhook_alb[0].id - referenced_security_group_id = aws_security_group.https_client_lambda.id - from_port = 80 - to_port = 80 - ip_protocol = "tcp" - description = "Allow HTTPS Client Lambda to reach mock webhook without mTLS" + description = "Allow HTTPS Client Lambda to reach mock webhook (mTLS and non-mTLS)" tags = local.default_tags } @@ -102,17 +91,3 @@ resource "aws_lb_listener" "mock_webhook_mtls" { tags = local.default_tags } - -resource "aws_lb_listener" "mock_webhook_http" { - count = var.deploy_mock_clients ? 1 : 0 - load_balancer_arn = aws_lb.mock_webhook_mtls[0].arn - port = 80 - protocol = "HTTP" - - default_action { - type = "forward" - target_group_arn = aws_lb_target_group.mock_webhook_mtls[0].arn - } - - tags = local.default_tags -} diff --git a/lambdas/https-client-lambda/src/__tests__/dlq-sender.test.ts b/lambdas/https-client-lambda/src/__tests__/dlq-sender.test.ts index 21ae3700..692e41c9 100644 --- a/lambdas/https-client-lambda/src/__tests__/dlq-sender.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/dlq-sender.test.ts @@ -54,4 +54,81 @@ describe("sendToDlq", () => { process.env.DLQ_URL = saved; }); + + it("includes ERROR_CODE and ERROR_MESSAGE for HTTP error with JSON body", async () => { + mockSend.mockResolvedValue({}); + + await sendToDlq('{"test":"message"}', { + statusCode: 400, + responseBody: JSON.stringify({ message: "Bad request" }), + }); + + const command = mockSend.mock.calls[0][0]; + expect(command).toBeInstanceOf(SendMessageCommand); + expect(command.input.MessageAttributes).toEqual({ + ERROR_CODE: { DataType: "String", StringValue: "HTTP_CLIENT_ERROR" }, + ERROR_MESSAGE: { DataType: "String", StringValue: "Bad request" }, + }); + }); + + it("uses raw response body as ERROR_MESSAGE when not valid JSON", async () => { + mockSend.mockResolvedValue({}); + + await sendToDlq('{"test":"message"}', { + statusCode: 400, + responseBody: "Bad request", + }); + + const command = mockSend.mock.calls[0][0]; + expect(command.input.MessageAttributes).toEqual({ + ERROR_CODE: { DataType: "String", StringValue: "HTTP_CLIENT_ERROR" }, + ERROR_MESSAGE: { DataType: "String", StringValue: "Bad request" }, + }); + }); + + it("uses errorCode as ERROR_CODE when provided", async () => { + mockSend.mockResolvedValue({}); + + await sendToDlq('{"test":"message"}', { + errorCode: "CERT_HAS_EXPIRED", + }); + + const command = mockSend.mock.calls[0][0]; + expect(command.input.MessageAttributes).toEqual({ + ERROR_CODE: { DataType: "String", StringValue: "CERT_HAS_EXPIRED" }, + }); + }); + + it("sends empty MessageAttributes when errorInfo has no relevant fields", async () => { + mockSend.mockResolvedValue({}); + + await sendToDlq('{"test":"message"}', {}); + + const command = mockSend.mock.calls[0][0]; + expect(command.input.MessageAttributes).toEqual({}); + }); + + it("sends no MessageAttributes when errorInfo is omitted", async () => { + mockSend.mockResolvedValue({}); + + await sendToDlq('{"test":"message"}'); + + const command = mockSend.mock.calls[0][0]; + expect(command.input.MessageAttributes).toBeUndefined(); + }); + + it("uses JSON body message field when present in responseBody", async () => { + mockSend.mockResolvedValue({}); + + await sendToDlq('{"test":"message"}', { + statusCode: 422, + responseBody: JSON.stringify({ message: "Validation failed", code: 42 }), + }); + + const command = mockSend.mock.calls[0][0]; + expect(command.input.MessageAttributes?.ERROR_MESSAGE).toEqual({ + DataType: "String", + StringValue: "Validation failed", + }); + }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts index 84984c71..efbc6d88 100644 --- a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts @@ -98,14 +98,6 @@ describe("admit", () => { ); }); - it("propagates non-NOSCRIPT Redis errors", async () => { - mockSendCommand.mockRejectedValueOnce(new Error("Connection refused")); - - await expect( - admit(mockRedis, "target-1", 10, true, defaultConfig), - ).rejects.toThrow("Connection refused"); - }); - it("passes cbProbeIntervalMs=0 when circuit breaker is disabled", async () => { mockSendCommand.mockResolvedValueOnce([1, "allowed", 0, 10]); @@ -123,8 +115,46 @@ describe("admit", () => { await admit(mockRedis, "my-target", 5, true, defaultConfig); const args = mockSendCommand.mock.calls[0]![0] as string[]; - expect(args[3]).toBe("cb:my-target"); - expect(args[4]).toBe("rl:my-target"); + expect(args[3]).toBe("cb:{my-target}"); + expect(args[4]).toBe("rl:{my-target}"); + }); +}); + +describe("evalScript", () => { + it("throws a wrapped error including the original message when EVALSHA fails with a non-NOSCRIPT Error", async () => { + const redisError = new Error("WRONGTYPE Operation against a key"); + mockSendCommand.mockRejectedValueOnce(redisError); + + const thrown = await admit( + mockRedis, + "target-1", + 10, + true, + defaultConfig, + ).catch((error: unknown) => error); + + expect(thrown).toBeInstanceOf(Error); + expect((thrown as Error).message).toContain("Redis error in script"); + expect((thrown as Error).message).toContain( + "WRONGTYPE Operation against a key", + ); + expect((thrown as Error & { cause: unknown }).cause).toBe(redisError); + }); + + it("throws a wrapped error using String() when EVALSHA rejects with a non-Error value", async () => { + mockSendCommand.mockRejectedValueOnce("connection refused"); + + const thrown = await admit( + mockRedis, + "target-1", + 10, + true, + defaultConfig, + ).catch((error: unknown) => error); + + expect(thrown).toBeInstanceOf(Error); + expect((thrown as Error).message).toContain("Redis error in script"); + expect((thrown as Error).message).toContain("connection refused"); }); }); @@ -187,20 +217,12 @@ describe("recordResult", () => { expect(mockSendCommand).toHaveBeenCalledTimes(2); }); - it("propagates non-NOSCRIPT Redis errors", async () => { - mockSendCommand.mockRejectedValueOnce(new Error("Connection refused")); - - await expect( - recordResult(mockRedis, "target-1", false, defaultConfig), - ).rejects.toThrow("Connection refused"); - }); - it("passes correct cb key for target", async () => { mockSendCommand.mockResolvedValueOnce([1, "closed"]); await recordResult(mockRedis, "my-target", true, defaultConfig); const args = mockSendCommand.mock.calls[0]![0] as string[]; - expect(args[3]).toBe("cb:my-target"); + expect(args[3]).toBe("cb:{my-target}"); }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index a31cc61c..f6cbdb68 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -142,7 +142,9 @@ describe("processRecords", () => { const failures = await processRecords([makeRecord()]); expect(failures).toEqual([]); - expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body); + expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body, { + outcome: "permanent_failure", + }); }); it("returns failure for transient 5xx errors", async () => { @@ -538,4 +540,21 @@ describe("processRecords", () => { 3_600_000, ); }); + + it("returns no failure when handleRateLimitedRecord resolves without throwing", async () => { + mockDeliverPayload.mockResolvedValue({ + outcome: "permanent_failure", + statusCode: 429, + retryAfterHeader: "60", + }); + mockHandleRateLimitedRecord.mockResolvedValueOnce(undefined); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([]); + expect(mockIsWindowExhausted).toHaveBeenCalledWith( + expect.any(Number), + 7_200_000, + ); + }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/https-client.test.ts b/lambdas/https-client-lambda/src/__tests__/https-client.test.ts index e1850567..a6229c57 100644 --- a/lambdas/https-client-lambda/src/__tests__/https-client.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/https-client.test.ts @@ -36,6 +36,7 @@ type MockResponse = EventEmitter & { function mockHttpsRequest( statusCode: number, headers: Record = {}, + body = "", ) { const mockReq = new EventEmitter() as EventEmitter & { end: jest.Mock; @@ -56,7 +57,13 @@ function mockHttpsRequest( }); if (callback) { - process.nextTick(() => callback(res)); + process.nextTick(() => { + callback(res); + process.nextTick(() => { + if (body) res.emit("data", Buffer.from(body)); + res.emit("end"); + }); + }); } return mockReq as unknown as ReturnType; @@ -125,7 +132,7 @@ describe("deliverPayload", () => { }); it("returns permanent_failure on 4xx non-429", async () => { - mockHttpsRequest(400); + mockHttpsRequest(400, {}, JSON.stringify({ message: "Bad request" })); const result = await deliverPayload( createTarget(), @@ -134,7 +141,11 @@ describe("deliverPayload", () => { createMockAgent(), ); - expect(result).toEqual({ outcome: "permanent_failure" }); + expect(result).toEqual({ + outcome: "permanent_failure", + statusCode: 400, + responseBody: JSON.stringify({ message: "Bad request" }), + }); }); it("returns permanent_failure on TLS error CERT_HAS_EXPIRED", async () => { @@ -147,7 +158,10 @@ describe("deliverPayload", () => { createMockAgent(), ); - expect(result).toEqual({ outcome: "permanent_failure" }); + expect(result).toEqual({ + outcome: "permanent_failure", + errorCode: "CERT_HAS_EXPIRED", + }); }); it("returns permanent_failure on TLS pinning error", async () => { @@ -160,7 +174,10 @@ describe("deliverPayload", () => { createMockAgent(), ); - expect(result).toEqual({ outcome: "permanent_failure" }); + expect(result).toEqual({ + outcome: "permanent_failure", + errorCode: "ERR_CERT_PINNING_FAILED", + }); }); it("returns transient_failure on 5xx", async () => { @@ -189,6 +206,7 @@ describe("deliverPayload", () => { expect(result).toEqual({ outcome: "rate_limited", retryAfterHeader: "60", + statusCode: 429, }); }); @@ -205,6 +223,7 @@ describe("deliverPayload", () => { expect(result).toEqual({ outcome: "rate_limited", retryAfterHeader: undefined, + statusCode: 429, }); }); @@ -287,4 +306,43 @@ describe("deliverPayload", () => { expect(result).toEqual({ outcome: "transient_failure", statusCode: 0 }); }); + + it("treats undefined statusCode as 0", async () => { + const mockReq = new EventEmitter() as EventEmitter & { + end: jest.Mock; + destroy: jest.Mock; + }; + mockReq.end = jest.fn(); + mockReq.destroy = jest.fn(); + + jest.spyOn(https, "request").mockImplementation((...args: unknown[]) => { + const callback = args.find((a) => typeof a === "function") as + | ((res: MockResponse) => void) + | undefined; + + const res = Object.assign(new EventEmitter(), { + statusCode: undefined as unknown as number, + headers: {}, + resume: jest.fn(), + }) as MockResponse; + + if (callback) { + process.nextTick(() => { + callback(res); + process.nextTick(() => (res as EventEmitter).emit("end")); + }); + } + + return mockReq as unknown as ReturnType; + }); + + const result = await deliverPayload( + createTarget(), + '{"test":true}', + "sig-abc", + createMockAgent(), + ); + + expect(result).toEqual({ outcome: "transient_failure", statusCode: 0 }); + }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts b/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts index b2ca7877..fae8112f 100644 --- a/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts @@ -129,6 +129,34 @@ describe("tls-agent-factory", () => { expect(mockSecretsManagerSend).not.toHaveBeenCalled(); }); + it("loads test CA for server trust when MTLS_TEST_CA_S3_KEY is set and mtls is disabled", async () => { + process.env.MTLS_TEST_CA_S3_KEY = "test-ca.pem"; + jest.resetModules(); + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery/tls-agent-factory"); + + const caPem = + "-----BEGIN CERTIFICATE-----\ntest-ca\n-----END CERTIFICATE-----"; + mockS3Send + .mockResolvedValueOnce({ + Body: { + transformToString: jest.fn().mockResolvedValue(COMBINED_PEM), + }, + }) + .mockResolvedValueOnce({ + Body: { transformToString: jest.fn().mockResolvedValue(caPem) }, + }); + + const agent = await mod.buildAgent( + createTarget({ delivery: { mtls: { enabled: false } } }), + ); + + expect(agent).toBeDefined(); + expect(agent.options.ca).toBe(caPem); + expect(agent.options.key).toBeUndefined(); + expect(agent.options.cert).toBeUndefined(); + }); + it("loads test CA when MTLS_TEST_CA_S3_KEY is set", async () => { process.env.MTLS_TEST_CA_S3_KEY = "test-ca.pem"; jest.resetModules(); diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index f552ea0d..48ee53cf 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -106,8 +106,13 @@ async function handleDeliveryResult( } if (result.outcome === OUTCOME_PERMANENT_FAILURE) { - recordDeliveryPermanentFailure(clientId, targetId); - await sendToDlq(record.body); + recordDeliveryPermanentFailure( + clientId, + targetId, + result.statusCode, + result.errorCode, + ); + await sendToDlq(record.body, result); return; } @@ -219,6 +224,10 @@ export async function processRecords( return null; } catch (error) { if (!(error instanceof VisibilityManagedError)) { + logger.error("Failed to process record", { + messageId: record.messageId, + err: error, + }); const receiveCount = Number( record.attributes.ApproximateReceiveCount, ); diff --git a/lambdas/https-client-lambda/src/services/delivery-observability.ts b/lambdas/https-client-lambda/src/services/delivery-observability.ts index 8fd4cea5..50dbb30e 100644 --- a/lambdas/https-client-lambda/src/services/delivery-observability.ts +++ b/lambdas/https-client-lambda/src/services/delivery-observability.ts @@ -31,11 +31,15 @@ export function recordDeliverySuccess( export function recordDeliveryPermanentFailure( clientId: string, targetId: string, + statusCode?: number, + errorCode?: string, ): void { emitDeliveryPermanentFailure(targetId); logger.warn("Permanent delivery failure — sending to DLQ", { clientId, targetId, + ...(statusCode !== undefined && { statusCode }), + ...(errorCode !== undefined && { errorCode }), }); } diff --git a/lambdas/https-client-lambda/src/services/delivery/https-client.ts b/lambdas/https-client-lambda/src/services/delivery/https-client.ts index 418d7563..dfe142f8 100644 --- a/lambdas/https-client-lambda/src/services/delivery/https-client.ts +++ b/lambdas/https-client-lambda/src/services/delivery/https-client.ts @@ -10,9 +10,15 @@ export const OUTCOME_TRANSIENT_FAILURE = "transient_failure" as const; export type DeliveryResult = | { outcome: typeof OUTCOME_SUCCESS } - | { outcome: typeof OUTCOME_PERMANENT_FAILURE } + | { + outcome: typeof OUTCOME_PERMANENT_FAILURE; + statusCode?: number; + errorCode?: string; + responseBody?: string; + } | { outcome: typeof OUTCOME_RATE_LIMITED; + statusCode: 429; retryAfterHeader: string | undefined; } | { outcome: typeof OUTCOME_TRANSIENT_FAILURE; statusCode: number }; @@ -41,29 +47,40 @@ export function deliverPayload( }, }, (res) => { - res.resume(); - const statusCode = res.statusCode ?? 0; if (statusCode >= 200 && statusCode < 300) { + res.resume(); resolve({ outcome: OUTCOME_SUCCESS }); return; } if (statusCode === 429) { + res.resume(); const retryAfterHeader = res.headers["retry-after"]; resolve({ outcome: OUTCOME_RATE_LIMITED, + statusCode, retryAfterHeader, }); return; } if (statusCode >= 400 && statusCode < 500) { - resolve({ outcome: OUTCOME_PERMANENT_FAILURE }); + const chunks: Buffer[] = []; + res.on("data", (chunk: Buffer) => chunks.push(chunk)); + res.on("end", () => { + const responseBody = Buffer.concat(chunks).toString("utf8"); + resolve({ + outcome: OUTCOME_PERMANENT_FAILURE, + statusCode, + responseBody, + }); + }); return; } + res.resume(); resolve({ outcome: OUTCOME_TRANSIENT_FAILURE, statusCode }); }, ); @@ -74,7 +91,7 @@ export function deliverPayload( req.on("error", (error: NodeJS.ErrnoException) => { if (error.code && PERMANENT_TLS_ERROR_CODES.has(error.code)) { - resolve({ outcome: OUTCOME_PERMANENT_FAILURE }); + resolve({ outcome: OUTCOME_PERMANENT_FAILURE, errorCode: error.code }); return; } diff --git a/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts b/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts index e6c0fcfa..fb1ea136 100644 --- a/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts +++ b/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts @@ -150,14 +150,20 @@ export async function buildAgent(target: CallbackTarget): Promise { ); } - if (target.delivery?.mtls?.enabled) { + // Always load the CA in test environments (MTLS_TEST_CA_S3_KEY set) so that + // targets with mtls.enabled: false can still verify the server's cert chain. + // In production the CA comes from SecretsManager only when mTLS is in use. + if (target.delivery?.mtls?.enabled || MTLS_TEST_CA_S3_KEY) { const material = await getMaterial(); - agentOptions.key = material.key; - agentOptions.cert = material.cert; if (material.ca) { agentOptions.ca = material.ca; } + + if (target.delivery?.mtls?.enabled) { + agentOptions.key = material.key; + agentOptions.cert = material.cert; + } } if (certPinning?.enabled) { diff --git a/lambdas/https-client-lambda/src/services/dlq-sender.ts b/lambdas/https-client-lambda/src/services/dlq-sender.ts index af61a666..56b92405 100644 --- a/lambdas/https-client-lambda/src/services/dlq-sender.ts +++ b/lambdas/https-client-lambda/src/services/dlq-sender.ts @@ -1,17 +1,70 @@ -import { SQSClient, SendMessageCommand } from "@aws-sdk/client-sqs"; +import { + type MessageAttributeValue, + SQSClient, + SendMessageCommand, +} from "@aws-sdk/client-sqs"; const sqsClient = new SQSClient({}); -export async function sendToDlq(messageBody: string): Promise { +export type DlqErrorInfo = { + statusCode?: number; + errorCode?: string; + responseBody?: string; +}; + +function buildDlqAttributes( + errorInfo: DlqErrorInfo, +): Record { + const attrs: Record = {}; + + if (errorInfo.errorCode) { + attrs.ERROR_CODE = { + DataType: "String", + StringValue: errorInfo.errorCode, + }; + } else if (errorInfo.statusCode !== undefined) { + attrs.ERROR_CODE = { + DataType: "String", + StringValue: "HTTP_CLIENT_ERROR", + }; + } + + if (errorInfo.responseBody) { + let errorMessage = errorInfo.responseBody; + try { + const parsed = JSON.parse(errorInfo.responseBody) as { + message?: string; + }; + if (parsed.message) { + errorMessage = parsed.message; + } + } catch { + // use raw body if not valid JSON + } + attrs.ERROR_MESSAGE = { DataType: "String", StringValue: errorMessage }; + } + + return attrs; +} + +export async function sendToDlq( + messageBody: string, + errorInfo?: DlqErrorInfo, +): Promise { const { DLQ_URL } = process.env; if (!DLQ_URL) { throw new Error("DLQ_URL is required"); } + const messageAttributes = errorInfo + ? buildDlqAttributes(errorInfo) + : undefined; + await sqsClient.send( new SendMessageCommand({ QueueUrl: DLQ_URL, MessageBody: messageBody, + ...(messageAttributes && { MessageAttributes: messageAttributes }), }), ); } diff --git a/lambdas/https-client-lambda/src/services/endpoint-gate.ts b/lambdas/https-client-lambda/src/services/endpoint-gate.ts index 81a98290..c2d85439 100644 --- a/lambdas/https-client-lambda/src/services/endpoint-gate.ts +++ b/lambdas/https-client-lambda/src/services/endpoint-gate.ts @@ -60,7 +60,12 @@ async function evalScript( const isNoScript = error instanceof Error && error.message.includes("NOSCRIPT"); if (!isNoScript) { - throw error; + throw new Error( + `Redis error in script ${script}: ${ + error instanceof Error ? error.message : String(error) + }`, + { cause: error }, + ); } return client.sendCommand(["EVAL", script, keyCount, ...keys, ...args]); } @@ -73,8 +78,8 @@ export async function admit( cbEnabled: boolean, config: EndpointGateConfig, ): Promise { - const cbKey = `cb:${targetId}`; - const rlKey = `rl:${targetId}`; + const cbKey = `cb:{${targetId}}`; + const rlKey = `rl:{${targetId}}`; const now = Date.now().toString(); const probeIntervalMs = cbEnabled ? config.cbProbeIntervalMs.toString() : "0"; @@ -125,7 +130,7 @@ export async function recordResult( success: boolean, config: EndpointGateConfig, ): Promise { - const cbKey = `cb:${targetId}`; + const cbKey = `cb:{${targetId}}`; const now = Date.now().toString(); const args = [ diff --git a/lambdas/https-client-lambda/src/services/redis-client.ts b/lambdas/https-client-lambda/src/services/redis-client.ts index bfe9e29c..7d8785c8 100644 --- a/lambdas/https-client-lambda/src/services/redis-client.ts +++ b/lambdas/https-client-lambda/src/services/redis-client.ts @@ -34,20 +34,28 @@ async function generateElastiCacheIamToken(): Promise { { protocol: "https:", method: "GET", - hostname: endpoint, + hostname: cacheName, path: "/", query: { Action: "connect", User: username }, - headers: { host: endpoint }, + headers: { host: cacheName }, }, { expiresIn: TOKEN_EXPIRY_SECONDS }, ); tokenExpiry = Date.now() + TOKEN_EXPIRY_SECONDS * 1000; + logger.info("ElastiCache IAM token generated", { + cacheName, + username, + region, + signingAlgorithm: signed.query?.["X-Amz-Algorithm"], + tokenExpiresAt: new Date(tokenExpiry).toISOString(), + }); + const qs = new URLSearchParams( signed.query as Record, ).toString(); - return `https://${signed.hostname}${signed.path}?${qs}`; + return `${cacheName}/?${qs}`; } export async function getRedisClient(): Promise { @@ -55,6 +63,7 @@ export async function getRedisClient(): Promise { tokenExpiry > Date.now() + TOKEN_REFRESH_BUFFER_SECONDS * 1000; if (redisClient?.isOpen && isTokenValid) { + logger.info("Reusing existing Redis client"); return redisClient; } @@ -69,11 +78,14 @@ export async function getRedisClient(): Promise { } if (redisClient?.isOpen) { + logger.info("Disconnecting Redis client for token refresh"); await redisClient.disconnect(); } const token = await generateElastiCacheIamToken(); + logger.info("Connecting to ElastiCache", { endpoint, username }); + redisClient = createClient({ url: `rediss://${endpoint}:6379`, username, diff --git a/scripts/tests/integration-debug.sh b/scripts/tests/integration-debug.sh index 08dc0e90..15329969 100755 --- a/scripts/tests/integration-debug.sh +++ b/scripts/tests/integration-debug.sh @@ -12,16 +12,20 @@ set -euo pipefail # Actions: # queue-status Show SQS queue message counts # queue-peek Peek one message from each SQS queue -# tail-transform Tail client-transform-filter lambda logs -# tail-webhook Tail mock-webhook lambda logs -# tail-pipe Tail EventBridge pipe log group -# pipe-state Show EventBridge pipe state and recent metrics +# tail-transform Tail client-transform-filter lambda logs +# tail-https-client Tail https-client lambda logs (requires CLIENT_ID) +# tail-webhook Tail mock-webhook lambda logs +# tail-pipe Tail EventBridge pipe log group +# pipe-state Show EventBridge pipe state and recent metrics # # Required: # ENVIRONMENT # AWS_PROFILE # ACTION # +# Required for queue-status, queue-peek: +# CLIENT_ID Client ID (e.g. mock-client-1) +# # Optional: # LOG_FILTER CloudWatch Logs filter pattern / text # AWS_REGION (default: eu-west-2) @@ -45,7 +49,7 @@ fi REGION="${AWS_REGION:-eu-west-2}" LOG_FILTER="${LOG_FILTER:-}" -SUBSCRIPTION_FIXTURE_PATH="${SUBSCRIPTION_FIXTURE_PATH:-tests/integration/fixtures/subscriptions/mock-client-1.json}" +CLIENT_ID="${CLIENT_ID:-}" if ! aws sts get-caller-identity --profile "$AWS_PROFILE" >/dev/null 2>&1; then echo "No active AWS SSO session for profile '$AWS_PROFILE'. Running aws sso login..." @@ -72,22 +76,12 @@ queue_url() { return 0 } -target_dlq_queue_name() { - local target_id - - if [[ ! -f "$SUBSCRIPTION_FIXTURE_PATH" ]]; then - echo "Error: subscription fixture not found: $SUBSCRIPTION_FIXTURE_PATH" >&2 - exit 1 - fi - - target_id="$(jq -r '.targets[0].targetId // empty' "$SUBSCRIPTION_FIXTURE_PATH")" - if [[ -z "$target_id" ]]; then - echo "Error: unable to read targets[0].targetId from $SUBSCRIPTION_FIXTURE_PATH" >&2 +require_client_id() { + if [ -z "$CLIENT_ID" ]; then + echo "Error: CLIENT_ID must be set for this action." >&2 + echo "Example: CLIENT_ID=mock-client-1 ENVIRONMENT= AWS_PROFILE= make test-integration-debug ACTION=queue-status" >&2 exit 1 fi - - echo "${PREFIX}-${target_id}-dlq-queue" - return 0 } show_queue_counts() { @@ -106,10 +100,11 @@ show_queue_counts() { } action_queue_status() { - show_queue_counts "Mock Target DLQ - Queue Message Counts" "$(target_dlq_queue_name)" - show_queue_counts "Inbound Event Queue - Queue Message Counts" "${PREFIX}-inbound-event-queue" - show_queue_counts "Inbound Event DLQ - Queue Message Counts" "${PREFIX}-inbound-event-dlq" - return 0 + require_client_id + show_queue_counts "Client Delivery Queue - Message Counts" "${PREFIX}-${CLIENT_ID}-delivery-queue" + show_queue_counts "Client Delivery DLQ - Message Counts" "${PREFIX}-${CLIENT_ID}-delivery-dlq-queue" + show_queue_counts "Inbound Event Queue - Message Counts" "${PREFIX}-inbound-event-queue" + show_queue_counts "Inbound Event DLQ - Message Counts" "${PREFIX}-inbound-event-dlq" } peek_queue_message() { @@ -135,19 +130,19 @@ peek_queue_message() { } action_queue_peek() { - peek_queue_message "Mock Target DLQ - Message Peek" "$(target_dlq_queue_name)" + require_client_id + peek_queue_message "Client Delivery Queue - Message Peek" "${PREFIX}-${CLIENT_ID}-delivery-queue" + peek_queue_message "Client Delivery DLQ - Message Peek" "${PREFIX}-${CLIENT_ID}-delivery-dlq-queue" peek_queue_message "Inbound Event Queue - Message Peek" "${PREFIX}-inbound-event-queue" peek_queue_message "Inbound Event DLQ - Message Peek" "${PREFIX}-inbound-event-dlq" return 0 } log_filter_args() { - local -a args=() - local escaped_log_filter if [[ -n "$LOG_FILTER" ]]; then - escaped_log_filter="${LOG_FILTER//\"/\\\"}" + local escaped_log_filter="${LOG_FILTER//\"/\\\"}" # CloudWatch filter patterns treat quoted strings as exact phrases. - args+=(--filter-pattern "\"$escaped_log_filter\"") + printf '%s\n' --filter-pattern "\"$escaped_log_filter\"" fi printf '%s\n' "${args[@]}" @@ -170,6 +165,22 @@ action_tail_transform() { return 0 } +action_tail_https_client() { + require_client_id + local -a filter_args=() + mapfile -t filter_args < <(log_filter_args) + + print_section "HTTPS Client Lambda Logs" + aws logs tail \ + "/aws/lambda/${PREFIX}-https-client-${CLIENT_ID}" \ + --region "$REGION" \ + --profile "$AWS_PROFILE" \ + --since 30m \ + --follow \ + --format short \ + "${filter_args[@]}" +} + action_tail_webhook() { local -a filter_args=() mapfile -t filter_args < <(log_filter_args) @@ -280,6 +291,9 @@ case "$ACTION" in tail-transform) action_tail_transform ;; + tail-https-client) + action_tail_https_client + ;; tail-webhook) action_tail_webhook ;; @@ -291,7 +305,7 @@ case "$ACTION" in ;; *) echo "Unknown action: $ACTION" >&2 - echo "Actions: queue-status, queue-peek, tail-transform, tail-webhook, tail-pipe, pipe-state" >&2 + echo "Actions: queue-status, queue-peek, tail-transform, tail-https-client, tail-webhook, tail-pipe, pipe-state" >&2 exit 1 ;; esac diff --git a/scripts/tests/test.mk b/scripts/tests/test.mk index a94a5af0..2bb70740 100644 --- a/scripts/tests/test.mk +++ b/scripts/tests/test.mk @@ -38,7 +38,7 @@ test-integration-local: # Run integration tests locally against a remoptely depl test-integration-debug: # Debug a live environment - inspect queues, tail logs, check pipe state (requires ENVIRONMENT, AWS_PROFILE, ACTION) @Testing make _test name="integration-debug" ACTION="$(or $(ACTION),$(word 2,$(MAKECMDGOALS)))" -queue-status queue-peek tail-transform tail-webhook tail-pipe pipe-state: +queue-status queue-peek tail-transform tail-https-client tail-webhook tail-pipe pipe-state: @: test-load: # Run all your load tests @Testing diff --git a/tests/integration/README.md b/tests/integration/README.md index a58531b8..0a76bf74 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -50,30 +50,33 @@ All are run via `make test-integration-debug ACTION=`. - [`queue-status`](#queue-status) – SQS queue message counts - [`queue-peek`](#queue-peek) – Peek at one message from each SQS queue - [`tail-transform`](#tail-transform) – Tail the transform/filter Lambda logs +- [`tail-https-client`](#tail-https-client) – Tail the https-client Lambda logs - [`tail-webhook`](#tail-webhook) – Tail the mock-webhook Lambda logs - [`tail-pipe`](#tail-pipe) – Tail the EventBridge pipe logs - [`pipe-state`](#pipe-state) – Show EventBridge pipe state and recent metrics -All log-tailing actions (`tail-transform`, `tail-webhook`, `tail-pipe`) accept an optional `LOG_FILTER` to narrow output to a specific message ID or pattern. +Some actions require `CLIENT_ID` (e.g. `mock-client-single-target`) — see individual actions below. + +All log-tailing actions (`tail-transform`, `tail-https-client`, `tail-webhook`, `tail-pipe`) accept an optional `LOG_FILTER` to narrow output to a specific message ID or pattern. --- ### `queue-status` -Shows approximate message counts for the inbound event queue, inbound event DLQ, and mock target DLQ. +Shows approximate message counts for the inbound event queue, inbound event DLQ, client delivery queue, and client delivery DLQ. Requires `CLIENT_ID`. ```sh -ENVIRONMENT= AWS_PROFILE= make test-integration-debug ACTION=queue-status +CLIENT_ID= ENVIRONMENT= AWS_PROFILE= make test-integration-debug ACTION=queue-status ``` --- ### `queue-peek` -Reads one message (without deleting it) from each of the same three queues, printing body, attributes, and message attributes. +Reads one message (without deleting it) from each of the same four queues, printing body, attributes, and message attributes. Requires `CLIENT_ID`. ```sh -ENVIRONMENT= AWS_PROFILE= make test-integration-debug ACTION=queue-peek +CLIENT_ID= ENVIRONMENT= AWS_PROFILE= make test-integration-debug ACTION=queue-peek ``` --- @@ -94,6 +97,22 @@ ENVIRONMENT= AWS_PROFILE= LOG_FILTER=SOME-MESSAGE-ID make test-int --- +### `tail-https-client` + +Tails CloudWatch logs for the `https-client` Lambda for the given client, following from the last 30 minutes. Requires `CLIENT_ID`. + +```sh +CLIENT_ID= ENVIRONMENT= AWS_PROFILE= make test-integration-debug ACTION=tail-https-client +``` + +Filter to a specific message ID: + +```sh +CLIENT_ID= ENVIRONMENT= AWS_PROFILE= LOG_FILTER=SOME-MESSAGE-ID make test-integration-debug ACTION=tail-https-client +``` + +--- + ### `tail-webhook` Tails CloudWatch logs for the `mock-webhook` Lambda, following from the last 30 minutes. diff --git a/tests/integration/dlq-alarms.test.ts b/tests/integration/dlq-alarms.test.ts index 1cf3a578..c4f69fa8 100644 --- a/tests/integration/dlq-alarms.test.ts +++ b/tests/integration/dlq-alarms.test.ts @@ -5,14 +5,18 @@ import { } from "@aws-sdk/client-cloudwatch"; import type { DeploymentDetails } from "@nhs-notify-client-callbacks/test-support/helpers"; import { getDeploymentDetails } from "@nhs-notify-client-callbacks/test-support/helpers"; -import { getAllSubscriptionTargetIds } from "./helpers/mock-client-config"; +import { + CLIENT_FIXTURES, + type ClientFixtureKey, + getClientConfig, +} from "./helpers/mock-client-config"; import { buildMockClientDlqQueueUrl } from "./helpers/sqs"; function buildDlqDepthAlarmName( { component, environment, project }: DeploymentDetails, - targetId: string, + clientId: string, ): string { - return `${project}-${environment}-${component}-${targetId}-dlq-depth`; + return `${project}-${environment}-${component}-${clientId}-dlq-depth`; } function getQueueNameFromUrl(queueUrl: string): string { @@ -27,7 +31,7 @@ function getQueueNameFromUrl(queueUrl: string): string { describe("DLQ alarms", () => { let cloudWatchClient: CloudWatchClient; let deploymentDetails: DeploymentDetails; - let targetIds: string[]; + let clientIds: string[]; beforeAll(() => { deploymentDetails = getDeploymentDetails(); @@ -35,22 +39,25 @@ describe("DLQ alarms", () => { region: deploymentDetails.region, }); - targetIds = getAllSubscriptionTargetIds(); + clientIds = (Object.keys(CLIENT_FIXTURES) as ClientFixtureKey[]).map( + (key) => getClientConfig(key).clientId, + ); }); afterAll(() => { cloudWatchClient.destroy(); }); - it("should create a DLQ depth alarm for every target DLQ", async () => { - expect(targetIds.length).toBeGreaterThan(0); + it("should create a DLQ depth alarm for every client DLQ", async () => { + expect(clientIds.length).toBeGreaterThan(0); - for (const targetId of targetIds) { - const alarmName = buildDlqDepthAlarmName(deploymentDetails, targetId); - const targetDlqQueueUrl = buildMockClientDlqQueueUrl(deploymentDetails, [ - { targetId }, - ]); - const targetDlqQueueName = getQueueNameFromUrl(targetDlqQueueUrl); + for (const clientId of clientIds) { + const alarmName = buildDlqDepthAlarmName(deploymentDetails, clientId); + const clientDlqQueueUrl = buildMockClientDlqQueueUrl( + deploymentDetails, + clientId, + ); + const clientDlqQueueName = getQueueNameFromUrl(clientDlqQueueUrl); const response = await cloudWatchClient.send( new DescribeAlarmsCommand({ AlarmNames: [alarmName], @@ -67,7 +74,7 @@ describe("DLQ alarms", () => { expect.arrayContaining([ expect.objectContaining({ Name: "QueueName", - Value: targetDlqQueueName, + Value: clientDlqQueueName, }), ]), ); diff --git a/tests/integration/dlq-redrive.test.ts b/tests/integration/dlq-redrive.test.ts index e88e4920..639eadc4 100644 --- a/tests/integration/dlq-redrive.test.ts +++ b/tests/integration/dlq-redrive.test.ts @@ -19,8 +19,10 @@ import { sendSqsEvent, } from "./helpers/sqs"; import { + CLIENT_FIXTURES, + type ClientFixtureKey, buildMockWebhookTargetPath, - getAllSubscriptionTargetIds, + getClientConfig, getMockItClientConfig, } from "./helpers/mock-client-config"; import { awaitSignedCallbacksFromWebhookLogGroup } from "./helpers/cloudwatch"; @@ -37,20 +39,20 @@ describe("DLQ Redrive", () => { beforeAll(async () => { const deploymentDetails = getDeploymentDetails(); - const mockClient1 = getMockItClientConfig(); - - const allSubscriptionTargetIds = getAllSubscriptionTargetIds(); + const { clientId } = getMockItClientConfig(); sqsClient = createSqsClient(deploymentDetails); cloudWatchClient = createCloudWatchLogsClient(deploymentDetails); inboundQueueUrl = buildInboundEventQueueUrl(deploymentDetails); - dlqQueueUrl = buildMockClientDlqQueueUrl( - deploymentDetails, - mockClient1.targets, - ); - allTargetDlqQueueUrls = allSubscriptionTargetIds.map((targetId) => - buildMockClientDlqQueueUrl(deploymentDetails, [{ targetId }]), + dlqQueueUrl = buildMockClientDlqQueueUrl(deploymentDetails, clientId); + allTargetDlqQueueUrls = ( + Object.keys(CLIENT_FIXTURES) as ClientFixtureKey[] + ).map((key) => + buildMockClientDlqQueueUrl( + deploymentDetails, + getClientConfig(key).clientId, + ), ); webhookLogGroupName = buildLambdaLogGroupName( deploymentDetails, @@ -67,7 +69,7 @@ describe("DLQ Redrive", () => { }); describe("Infrastructure validation", () => { - it("should confirm a target DLQ is accessible for all configured subscription targets", async () => { + it("should confirm a DLQ is accessible for all configured clients", async () => { const responses = await Promise.all( allTargetDlqQueueUrls.map((queueUrl) => sqsClient.send( diff --git a/tests/integration/helpers/mock-client-config.ts b/tests/integration/helpers/mock-client-config.ts index a004b4bc..950e699d 100644 --- a/tests/integration/helpers/mock-client-config.ts +++ b/tests/integration/helpers/mock-client-config.ts @@ -29,14 +29,6 @@ export const CLIENT_FIXTURES = { export type ClientFixtureKey = keyof typeof CLIENT_FIXTURES; -const ALL_CLIENT_FIXTURE_KEYS = Object.keys( - CLIENT_FIXTURES, -) as ClientFixtureKey[]; - -function dedupe(values: string[]): string[] { - return [...new Set(values)]; -} - export function getClientConfig(key: ClientFixtureKey): MockItClientConfig { // eslint-disable-next-line security/detect-object-injection -- key is constrained to ClientFixtureKey, a keyof the hardcoded as-const CLIENT_FIXTURES object const { apiKeyVar, applicationIdVar, fixture } = CLIENT_FIXTURES[key]; @@ -82,18 +74,3 @@ export function buildMockWebhookTargetPaths( ): string[] { return buildWebhookTargetPaths(key); } - -export function getSubscriptionTargetIds( - key: ClientFixtureKey = "client1", -): string[] { - const config = getClientConfig(key); - return dedupe( - config.subscriptions.flatMap((subscription) => subscription.targetIds), - ); -} - -export function getAllSubscriptionTargetIds( - keys: ClientFixtureKey[] = ALL_CLIENT_FIXTURE_KEYS, -): string[] { - return dedupe(keys.flatMap((key) => getSubscriptionTargetIds(key))); -} diff --git a/tests/integration/helpers/sqs.ts b/tests/integration/helpers/sqs.ts index 857fd3a7..747f746b 100644 --- a/tests/integration/helpers/sqs.ts +++ b/tests/integration/helpers/sqs.ts @@ -46,13 +46,16 @@ function buildQueueUrl( export function buildMockClientDlqQueueUrl( deploymentDetails: DeploymentDetails, - targets: { targetId: string }[], + clientId: string, ): string { - const [firstTarget] = targets; - if (!firstTarget) { - throw new Error("At least one target is required to build DLQ URL"); - } - return buildQueueUrl(deploymentDetails, `${firstTarget.targetId}-dlq`); + return buildQueueUrl(deploymentDetails, `${clientId}-delivery-dlq`); +} + +export function buildMockClientDeliveryQueueUrl( + deploymentDetails: DeploymentDetails, + clientId: string, +): string { + return buildQueueUrl(deploymentDetails, `${clientId}-delivery`); } export async function sendSqsEvent( diff --git a/tests/integration/inbound-sqs-to-webhook.test.ts b/tests/integration/inbound-sqs-to-webhook.test.ts index 4305f05e..d75ad402 100644 --- a/tests/integration/inbound-sqs-to-webhook.test.ts +++ b/tests/integration/inbound-sqs-to-webhook.test.ts @@ -28,6 +28,7 @@ import { assertCallbackHeaders } from "./helpers/signature"; import { awaitQueueMessage, awaitQueueMessageByMessageId, + buildMockClientDeliveryQueueUrl, buildMockClientDlqQueueUrl, ensureInboundQueueIsEmpty, purgeQueues, @@ -49,6 +50,7 @@ describe("SQS to Webhook Integration", () => { let cloudWatchClient: CloudWatchLogsClient; let callbackEventQueueUrl: string; let clientDlqQueueUrl: string; + let clientDeliveryQueueUrl: string; let inboundEventDlqQueueUrl: string; let webhookLogGroupName: string; let webhookTargetPath: string; @@ -56,12 +58,16 @@ describe("SQS to Webhook Integration", () => { beforeAll(async () => { const deploymentDetails = getDeploymentDetails(); - const { targets } = getMockItClientConfig(); + const { clientId } = getMockItClientConfig(); sqsClient = createSqsClient(deploymentDetails); cloudWatchClient = createCloudWatchLogsClient(deploymentDetails); callbackEventQueueUrl = buildInboundEventQueueUrl(deploymentDetails); - clientDlqQueueUrl = buildMockClientDlqQueueUrl(deploymentDetails, targets); + clientDlqQueueUrl = buildMockClientDlqQueueUrl(deploymentDetails, clientId); + clientDeliveryQueueUrl = buildMockClientDeliveryQueueUrl( + deploymentDetails, + clientId, + ); inboundEventDlqQueueUrl = buildInboundEventDlqQueueUrl(deploymentDetails); webhookLogGroupName = buildLambdaLogGroupName( deploymentDetails, @@ -72,6 +78,7 @@ describe("SQS to Webhook Integration", () => { await purgeQueues(sqsClient, [ inboundEventDlqQueueUrl, clientDlqQueueUrl, + clientDeliveryQueueUrl, callbackEventQueueUrl, ]); }); @@ -80,6 +87,7 @@ describe("SQS to Webhook Integration", () => { await purgeQueues(sqsClient, [ inboundEventDlqQueueUrl, clientDlqQueueUrl, + clientDeliveryQueueUrl, callbackEventQueueUrl, ]); @@ -195,7 +203,7 @@ describe("SQS to Webhook Integration", () => { }); describe("Client Webhook DLQ", () => { - it("should route a non-retriable (4xx) webhook response to the per-target DLQ", async () => { + it("should route a non-retriable (4xx) webhook response to the per-client DLQ", async () => { const event: StatusPublishEvent = createMessageStatusPublishEvent({ data: { @@ -209,7 +217,7 @@ describe("SQS to Webhook Integration", () => { expect(dlqMessage.Body).toBeDefined(); expect(dlqMessage.MessageAttributes?.ERROR_CODE?.StringValue).toBe( - "INVALID_PARAMETER", + "HTTP_CLIENT_ERROR", ); expect( dlqMessage.MessageAttributes?.ERROR_MESSAGE?.StringValue, diff --git a/tests/integration/metrics.test.ts b/tests/integration/metrics.test.ts index 2f314f85..f40eba69 100644 --- a/tests/integration/metrics.test.ts +++ b/tests/integration/metrics.test.ts @@ -40,12 +40,12 @@ describe("Metrics", () => { beforeAll(async () => { const deploymentDetails = getDeploymentDetails(); - const { targets } = getMockItClientConfig(); + const { clientId } = getMockItClientConfig(); sqsClient = createSqsClient(deploymentDetails); cloudWatchClient = createCloudWatchLogsClient(deploymentDetails); callbackEventQueueUrl = buildInboundEventQueueUrl(deploymentDetails); - clientDlqQueueUrl = buildMockClientDlqQueueUrl(deploymentDetails, targets); + clientDlqQueueUrl = buildMockClientDlqQueueUrl(deploymentDetails, clientId); inboundEventDlqQueueUrl = buildInboundEventDlqQueueUrl(deploymentDetails); logGroupName = buildLambdaLogGroupName( deploymentDetails, From 91d993fe869e357f6ad53d1f7e93399169951a8f Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Tue, 21 Apr 2026 15:22:47 +0100 Subject: [PATCH 25/65] Set the SPKI hash for test client config --- .../terraform/components/callbacks/README.md | 1 + .../terraform/components/callbacks/locals.tf | 11 +++++++++++ .../callbacks/s3_bucket_mtls_test_certs.tf | 19 +++++++++++++++++++ .../components/callbacks/versions.tf | 4 ++++ 4 files changed, 35 insertions(+) diff --git a/infrastructure/terraform/components/callbacks/README.md b/infrastructure/terraform/components/callbacks/README.md index 8ec08700..4f666c27 100644 --- a/infrastructure/terraform/components/callbacks/README.md +++ b/infrastructure/terraform/components/callbacks/README.md @@ -8,6 +8,7 @@ |------|---------| | [terraform](#requirement\_terraform) | >= 1.10.1 | | [aws](#requirement\_aws) | 6.13 | +| [external](#requirement\_external) | ~> 2.0 | | [random](#requirement\_random) | ~> 3.0 | | [tls](#requirement\_tls) | ~> 4.0 | ## Inputs diff --git a/infrastructure/terraform/components/callbacks/locals.tf b/infrastructure/terraform/components/callbacks/locals.tf index d80b5b7e..68129a5b 100644 --- a/infrastructure/terraform/components/callbacks/locals.tf +++ b/infrastructure/terraform/components/callbacks/locals.tf @@ -13,6 +13,10 @@ locals { } ]...) + # SPKI hash of the mock webhook server certificate for cert-pinning enrichment. + # Computed via external data source because Terraform cannot SHA-256 hash raw binary (DER) data natively. + mock_server_spki_hash = var.deploy_mock_clients ? data.external.mock_server_spki_hash[0].result.hash : "" + # When deploying mock clients, replace sentinel placeholder values with the mock webhook URL and API key. # Only used for S3 object content — must not be used as a for_each source (contains apply-time values). enriched_mock_config_clients = var.deploy_mock_clients ? { @@ -23,6 +27,13 @@ locals { merge(target, { invocationEndpoint = "https://${aws_lb.mock_webhook_mtls[0].dns_name}/${target.targetId}" apiKey = merge(target.apiKey, { headerValue = random_password.mock_webhook_api_key[0].result }) + delivery = merge(try(target.delivery, {}), { + mtls = merge(try(target.delivery.mtls, {}), { + certPinning = merge(try(target.delivery.mtls.certPinning, {}), try(target.delivery.mtls.certPinning.enabled, false) ? { + spkiHash = local.mock_server_spki_hash + } : {}) + }) + }) }) ] }) diff --git a/infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf b/infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf index e04c8027..e1bd377e 100644 --- a/infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf +++ b/infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf @@ -172,3 +172,22 @@ resource "aws_s3_object" "mtls_test_ca" { server_side_encryption = "aws:kms" content_type = "application/x-pem-file" } + +# Compute the base64-encoded SHA-256 hash of the mock server's SPKI (Subject Public Key Info) DER. +# Used by cert-pinning clients to verify the server certificate during mTLS handshake. +data "external" "mock_server_spki_hash" { + count = var.deploy_mock_clients ? 1 : 0 + program = ["bash", "-c", <<-EOT + HASH=$(jq -r '.pem' \ + | openssl pkey -pubin -outform DER 2>/dev/null \ + | openssl dgst -sha256 -binary \ + | base64 \ + | tr -d '\n') + printf '{"hash":"%s"}' "$HASH" + EOT + ] + + query = { + pem = tls_private_key.mock_server[0].public_key_pem + } +} diff --git a/infrastructure/terraform/components/callbacks/versions.tf b/infrastructure/terraform/components/callbacks/versions.tf index 88481e64..d91998a2 100644 --- a/infrastructure/terraform/components/callbacks/versions.tf +++ b/infrastructure/terraform/components/callbacks/versions.tf @@ -4,6 +4,10 @@ terraform { source = "hashicorp/aws" version = "6.13" } + external = { + source = "hashicorp/external" + version = "~> 2.0" + } random = { source = "hashicorp/random" version = "~> 3.0" From d4d304cefbfd51fb4fe99705aef917c8fefd7a2e Mon Sep 17 00:00:00 2001 From: rhyscoxnhs Date: Thu, 23 Apr 2026 09:08:27 +0100 Subject: [PATCH 26/65] CCM-16002 - Revised performance test implementation (#123) * CCM-16002 - Revised performance test implementation --- .../terraform/components/callbacks/README.md | 2 + .../callbacks/module_perf_runner_lambda.tf | 102 ++++++ .../terraform/components/callbacks/pre.sh | 26 +- .../components/callbacks/variables.tf | 6 + knip.ts | 6 +- lambdas/perf-runner-lambda/jest.config.ts | 13 + .../perf-runner-lambda}/package.json | 16 +- .../src/__tests__/cloudwatch.test.ts | 287 ++++++++++++++++ .../src/__tests__/event-factories.test.ts | 68 ++++ .../src/__tests__/index.test.ts | 122 +++++++ .../src/__tests__/runner.test.ts | 323 ++++++++++++++++++ .../src/__tests__/sqs.test.ts | 141 ++++++++ lambdas/perf-runner-lambda/src/cloudwatch.ts | 110 ++++++ .../src}/event-factories.ts | 44 +-- lambdas/perf-runner-lambda/src/index.ts | 55 +++ lambdas/perf-runner-lambda/src/runner.ts | 140 ++++++++ lambdas/perf-runner-lambda/src/scenario.ts | 82 +++++ lambdas/perf-runner-lambda/src/sqs.ts | 78 +++++ lambdas/perf-runner-lambda/src/types.ts | 79 +++++ .../perf-runner-lambda}/tsconfig.json | 9 +- package.json | 9 - pnpm-lock.yaml | 80 +++-- pnpm-workspace.yaml | 7 + tests/performance/README.md | 32 -- .../fixtures/subscriptions/perf-client-1.json | 44 +++ .../fixtures/subscriptions/perf-client-2.json | 54 +++ .../fixtures/subscriptions/perf-client-3.json | 64 ++++ .../fixtures/subscriptions/perf-client-4.json | 57 ++++ tests/performance/helpers/cloudwatch.ts | 156 --------- tests/performance/helpers/deployment.ts | 10 - tests/performance/helpers/index.ts | 4 - tests/performance/helpers/sqs.ts | 72 ---- tests/performance/jest.config.ts | 14 - tests/performance/lambda-throughput.test.ts | 76 ----- .../src/repository/client-subscriptions.ts | 2 +- 35 files changed, 1932 insertions(+), 458 deletions(-) create mode 100644 infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf create mode 100644 lambdas/perf-runner-lambda/jest.config.ts rename {tests/performance => lambdas/perf-runner-lambda}/package.json (60%) create mode 100644 lambdas/perf-runner-lambda/src/__tests__/cloudwatch.test.ts create mode 100644 lambdas/perf-runner-lambda/src/__tests__/event-factories.test.ts create mode 100644 lambdas/perf-runner-lambda/src/__tests__/index.test.ts create mode 100644 lambdas/perf-runner-lambda/src/__tests__/runner.test.ts create mode 100644 lambdas/perf-runner-lambda/src/__tests__/sqs.test.ts create mode 100644 lambdas/perf-runner-lambda/src/cloudwatch.ts rename {tests/performance/helpers => lambdas/perf-runner-lambda/src}/event-factories.ts (70%) create mode 100644 lambdas/perf-runner-lambda/src/index.ts create mode 100644 lambdas/perf-runner-lambda/src/runner.ts create mode 100644 lambdas/perf-runner-lambda/src/scenario.ts create mode 100644 lambdas/perf-runner-lambda/src/sqs.ts create mode 100644 lambdas/perf-runner-lambda/src/types.ts rename {tests/performance => lambdas/perf-runner-lambda}/tsconfig.json (60%) delete mode 100644 tests/performance/README.md create mode 100644 tests/performance/fixtures/subscriptions/perf-client-1.json create mode 100644 tests/performance/fixtures/subscriptions/perf-client-2.json create mode 100644 tests/performance/fixtures/subscriptions/perf-client-3.json create mode 100644 tests/performance/fixtures/subscriptions/perf-client-4.json delete mode 100644 tests/performance/helpers/cloudwatch.ts delete mode 100644 tests/performance/helpers/deployment.ts delete mode 100644 tests/performance/helpers/index.ts delete mode 100644 tests/performance/helpers/sqs.ts delete mode 100644 tests/performance/jest.config.ts delete mode 100644 tests/performance/lambda-throughput.test.ts diff --git a/infrastructure/terraform/components/callbacks/README.md b/infrastructure/terraform/components/callbacks/README.md index 4f666c27..02804698 100644 --- a/infrastructure/terraform/components/callbacks/README.md +++ b/infrastructure/terraform/components/callbacks/README.md @@ -20,6 +20,7 @@ | [component](#input\_component) | The variable encapsulating the name of this component | `string` | `"callbacks"` | no | | [default\_tags](#input\_default\_tags) | A map of default tags to apply to all taggable resources within the component | `map(string)` | `{}` | no | | [deploy\_mock\_clients](#input\_deploy\_mock\_clients) | Flag to deploy mock webhook lambda for integration testing (test/dev environments only) | `bool` | `false` | no | +| [deploy\_perf\_runner](#input\_deploy\_perf\_runner) | Flag to deploy the perf-runner lambda for performance testing (test/dev environments only) | `bool` | `false` | no | | [elasticache\_data\_storage\_maximum\_gb](#input\_elasticache\_data\_storage\_maximum\_gb) | Maximum data storage in GB for the ElastiCache Serverless delivery state cache | `number` | `1` | no | | [enable\_event\_anomaly\_detection](#input\_enable\_event\_anomaly\_detection) | Enable CloudWatch anomaly detection alarm for inbound event queue message reception | `bool` | `true` | no | | [enable\_xray\_tracing](#input\_enable\_xray\_tracing) | Enable AWS X-Ray active tracing for Lambda functions | `bool` | `false` | no | @@ -54,6 +55,7 @@ | [kms](#module\_kms) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-kms.zip | n/a | | [mock\_webhook\_lambda](#module\_mock\_webhook\_lambda) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-lambda.zip | n/a | | [mtls\_test\_certs\_bucket](#module\_mtls\_test\_certs\_bucket) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-s3bucket.zip | n/a | +| [perf\_runner\_lambda](#module\_perf\_runner\_lambda) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-lambda.zip | n/a | | [sqs\_inbound\_event](#module\_sqs\_inbound\_event) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-sqs.zip | n/a | ## Outputs diff --git a/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf b/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf new file mode 100644 index 00000000..424294a8 --- /dev/null +++ b/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf @@ -0,0 +1,102 @@ +module "perf_runner_lambda" { + count = var.deploy_perf_runner ? 1 : 0 + source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-lambda.zip" + + function_name = "perf-runner" + description = "Lambda function that executes performance tests against the client callbacks pipeline from within AWS" + + aws_account_id = var.aws_account_id + component = var.component + environment = var.environment + project = var.project + region = var.region + group = var.group + + log_retention_in_days = var.log_retention_in_days + kms_key_arn = module.kms.key_arn + + iam_policy_document = { + body = data.aws_iam_policy_document.perf_runner_lambda[0].json + } + + function_s3_bucket = local.acct.s3_buckets["lambda_function_artefacts"]["id"] + function_code_base_path = local.aws_lambda_functions_dir_path + function_code_dir = "perf-runner-lambda/dist" + handler_function_name = "handler" + runtime = "nodejs22.x" + memory = 512 + timeout = 900 + + log_level = var.log_level + force_lambda_code_deploy = var.force_lambda_code_deploy + enable_lambda_insights = false + enable_xray_tracing = false + + log_destination_arn = local.log_destination_arn + log_subscription_role_arn = local.acct.log_subscription_role_arn + + lambda_env_vars = { + ENVIRONMENT = var.environment + INBOUND_QUEUE_URL = module.sqs_inbound_event.sqs_queue_url + TRANSFORM_FILTER_LOG_GROUP = module.client_transform_filter_lambda.cloudwatch_log_group_name + DELIVERY_LOG_GROUP_PREFIX = "/aws/lambda/${local.csi}-https-client-" + } +} + +data "aws_iam_policy_document" "perf_runner_lambda" { + count = var.deploy_perf_runner ? 1 : 0 + + statement { + sid = "KMSPermissions" + effect = "Allow" + + actions = [ + "kms:Decrypt", + "kms:GenerateDataKey", + ] + + resources = [ + module.kms.key_arn, + ] + } + + statement { + sid = "SQSSendMessage" + effect = "Allow" + + actions = [ + "sqs:SendMessage", + "sqs:SendMessageBatch", + ] + + resources = [ + module.sqs_inbound_event.sqs_queue_arn, + ] + } + + statement { + sid = "CloudWatchLogsInsightsQuery" + effect = "Allow" + + actions = [ + "logs:StartQuery", + "logs:StopQuery", + ] + + resources = [ + "arn:aws:logs:${var.region}:${var.aws_account_id}:log-group:${module.client_transform_filter_lambda.cloudwatch_log_group_name}:*", + "arn:aws:logs:${var.region}:${var.aws_account_id}:log-group:/aws/lambda/${local.csi}-https-client-*", + ] + } + + statement { + sid = "CloudWatchLogsInsightsResults" + effect = "Allow" + + actions = [ + "logs:GetQueryResults", + ] + + resources = ["*"] + } +} diff --git a/infrastructure/terraform/components/callbacks/pre.sh b/infrastructure/terraform/components/callbacks/pre.sh index bba7c1b5..39eb0817 100755 --- a/infrastructure/terraform/components/callbacks/pre.sh +++ b/infrastructure/terraform/components/callbacks/pre.sh @@ -6,17 +6,21 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # shellcheck source=_paths.sh source "${script_dir}/_paths.sh" -# Resolve deploy_mock_clients from tfvars; base_path/group/region/environment are in scope from terraform.sh +# Resolve deploy_mock_clients and deploy_perf_runner from tfvars; base_path/group/region/environment are in scope from terraform.sh deploy_mock_clients="false" +deploy_perf_runner="false" for _tfvar_file in \ "${base_path}/etc/group_${group}.tfvars" \ "${base_path}/etc/env_${region}_${environment}.tfvars"; do if [[ -f "${_tfvar_file}" ]]; then _val=$(grep -E '^\s*deploy_mock_clients\s*=' "${_tfvar_file}" | tail -1 | sed 's/.*=\s*//;s/\s*$//') - [[ -n "${_val}" ]] && deploy_mock_clients="${_val}" + [ -n "${_val}" ] && deploy_mock_clients="${_val}" + _val=$(grep -E '^\s*deploy_perf_runner\s*=' "${_tfvar_file}" | tail -1 | sed 's/.*=\s*//;s/\s*$//') + [ -n "${_val}" ] && deploy_perf_runner="${_val}" fi done echo "deploy_mock_clients resolved to: ${deploy_mock_clients}" +echo "deploy_perf_runner resolved to: ${deploy_perf_runner}" pnpm install --frozen-lockfile @@ -24,16 +28,14 @@ pnpm run generate-dependencies "${script_dir}/sync-client-config.sh" -if [[ "${deploy_mock_clients}" == "true" ]]; then - shopt -s nullglob - existing_configs=("${clients_dir}"/*.json) - shopt -u nullglob - if [[ "${#existing_configs[@]}" -eq 0 ]]; then - cp "${bounded_context_root}/tests/integration/fixtures/subscriptions/"*.json "${clients_dir}/" - echo "Copied mock client subscription config fixtures into clients dir" - else - echo "Client configs already present from S3 sync; skipping fixture copy" - fi +if [ "${deploy_mock_clients}" == "true" ]; then + cp "${bounded_context_root}/tests/integration/fixtures/subscriptions/"*.json "${clients_dir}/" + echo "Copied mock client subscription config fixtures into clients dir" +fi + +if [ "${deploy_perf_runner}" == "true" ]; then + cp "${bounded_context_root}/tests/performance/fixtures/subscriptions/"*.json "${clients_dir}/" + echo "Copied perf client subscription config fixtures into clients dir" fi pnpm run --recursive --if-present lambda-build diff --git a/infrastructure/terraform/components/callbacks/variables.tf b/infrastructure/terraform/components/callbacks/variables.tf index 0f2fc202..9c71492d 100644 --- a/infrastructure/terraform/components/callbacks/variables.tf +++ b/infrastructure/terraform/components/callbacks/variables.tf @@ -155,6 +155,12 @@ variable "deploy_mock_clients" { default = false } +variable "deploy_perf_runner" { + type = bool + description = "Flag to deploy the perf-runner lambda for performance testing (test/dev environments only)" + default = false +} + variable "enable_xray_tracing" { type = bool description = "Enable AWS X-Ray active tracing for Lambda functions" diff --git a/knip.ts b/knip.ts index fdb7d0f7..f8612f70 100644 --- a/knip.ts +++ b/knip.ts @@ -38,6 +38,9 @@ const config: KnipConfig = { "lambdas/mock-webhook-lambda": { ignoreDependencies: ["@tsconfig/node22"], }, + "lambdas/perf-runner-lambda": { + ignoreDependencies: ["@tsconfig/node22", "@types/aws-lambda"], + }, "src/config-cache": { ignoreDependencies: ["@tsconfig/node22"], }, @@ -59,9 +62,6 @@ const config: KnipConfig = { "async-wait-until", ], }, - "tests/performance": { - ignoreDependencies: ["@tsconfig/node22"], - }, "tests/test-support": { ignoreDependencies: ["@tsconfig/node22"], }, diff --git a/lambdas/perf-runner-lambda/jest.config.ts b/lambdas/perf-runner-lambda/jest.config.ts new file mode 100644 index 00000000..218d8ffd --- /dev/null +++ b/lambdas/perf-runner-lambda/jest.config.ts @@ -0,0 +1,13 @@ +import { nodeJestConfig } from "../../jest.config.base.ts"; + +export default { + ...nodeJestConfig, + coverageThreshold: { + global: { + ...nodeJestConfig.coverageThreshold?.global, + branches: 100, + lines: 100, + statements: 100, + }, + }, +}; diff --git a/tests/performance/package.json b/lambdas/perf-runner-lambda/package.json similarity index 60% rename from tests/performance/package.json rename to lambdas/perf-runner-lambda/package.json index 5e2f6c2f..9f9d01d8 100644 --- a/tests/performance/package.json +++ b/lambdas/perf-runner-lambda/package.json @@ -1,30 +1,32 @@ { + "name": "nhs-notify-perf-runner-lambda", + "version": "0.0.1", + "private": true, "engines": { "node": ">=24.14.1" }, - "name": "nhs-notify-client-callbacks-performance-tests", - "version": "0.0.1", - "private": true, "scripts": { - "test:performance": "jest", - "test:unit": "echo 'No unit tests in performance workspace - skipping'", + "lambda-build": "rm -rf dist && pnpm exec esbuild --bundle --minify --sourcemap --target=es2020 --platform=node --entry-names=[name] --outdir=dist src/index.ts", "lint": "eslint .", "lint:fix": "eslint . --fix", + "test:unit": "jest", "typecheck": "tsc --noEmit" }, "dependencies": { "@aws-sdk/client-cloudwatch-logs": "catalog:aws", "@aws-sdk/client-sqs": "catalog:aws", + "@nhs-notify-client-callbacks/logger": "workspace:*", "@nhs-notify-client-callbacks/models": "workspace:*", - "@nhs-notify-client-callbacks/test-support": "workspace:*", - "async-wait-until": "catalog:app" + "esbuild": "catalog:tools" }, "devDependencies": { "@tsconfig/node22": "catalog:tools", + "@types/aws-lambda": "catalog:tools", "@types/jest": "catalog:test", "@types/node": "catalog:tools", "eslint": "catalog:lint", "jest": "catalog:test", + "ts-jest": "catalog:test", "typescript": "catalog:tools" } } diff --git a/lambdas/perf-runner-lambda/src/__tests__/cloudwatch.test.ts b/lambdas/perf-runner-lambda/src/__tests__/cloudwatch.test.ts new file mode 100644 index 00000000..055ac7bc --- /dev/null +++ b/lambdas/perf-runner-lambda/src/__tests__/cloudwatch.test.ts @@ -0,0 +1,287 @@ +import type { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; +import { queryDeliveryMetricsSnapshot, queryMetricsSnapshot } from "cloudwatch"; + +const mockCloudWatchClient = { + send: jest.fn(), +} as unknown as jest.Mocked; + +beforeEach(() => { + jest.useFakeTimers(); +}); + +afterEach(() => { + jest.useRealTimers(); +}); + +describe("queryMetricsSnapshot", () => { + it("returns null when StartQuery returns no queryId", async () => { + mockCloudWatchClient.send.mockResolvedValueOnce({} as never); + + const result = await queryMetricsSnapshot( + mockCloudWatchClient, + "/aws/lambda/nhs-dev-callbacks-client-transform-filter", + 1_700_000_000, + 1_700_000_060, + ); + + expect(result).toBeNull(); + }); + + it("returns null when the query status is Failed", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-1" } as never) + .mockResolvedValueOnce({ status: "Failed" } as never); + + const promise = queryMetricsSnapshot( + mockCloudWatchClient, + "/aws/lambda/test", + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toBeNull(); + }); + + it("returns null when the query status is Cancelled", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-2" } as never) + .mockResolvedValueOnce({ status: "Cancelled" } as never); + + const promise = queryMetricsSnapshot( + mockCloudWatchClient, + "/aws/lambda/test", + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toBeNull(); + }); + + it("returns a snapshot with zeroed metrics when the result row is empty", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-3" } as never) + .mockResolvedValueOnce({ status: "Complete", results: [] } as never); + + const promise = queryMetricsSnapshot( + mockCloudWatchClient, + "/aws/lambda/test", + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toMatchObject({ p50Ms: 0, p95Ms: 0, p99Ms: 0, count: 0 }); + expect(result?.snapshotAt).toBeGreaterThan(0); + }); + + it("returns a populated snapshot when query completes successfully", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-4" } as never) + .mockResolvedValueOnce({ + status: "Complete", + results: [ + [ + { field: "eventCount", value: "500" }, + { field: "p50", value: "42" }, + { field: "p95", value: "120" }, + { field: "p99", value: "250" }, + ], + ], + } as never); + + const promise = queryMetricsSnapshot( + mockCloudWatchClient, + "/aws/lambda/test", + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toMatchObject({ + count: 500, + p50Ms: 42, + p95Ms: 120, + p99Ms: 250, + }); + }); + + it("polls until the query becomes Complete", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-5" } as never) + .mockResolvedValueOnce({ status: "Running" } as never) + .mockResolvedValueOnce({ status: "Running" } as never) + .mockResolvedValueOnce({ + status: "Complete", + results: [[{ field: "eventCount", value: "10" }]], + } as never); + + const promise = queryMetricsSnapshot( + mockCloudWatchClient, + "/aws/lambda/test", + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result?.count).toBe(10); + expect(mockCloudWatchClient.send).toHaveBeenCalledTimes(4); + }); + + it("returns null when the query does not complete within the timeout", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-6" } as never) + .mockResolvedValue({ status: "Running" } as never); + + const promise = queryMetricsSnapshot( + mockCloudWatchClient, + "/aws/lambda/test", + 0, + 60, + ); + + await jest.advanceTimersByTimeAsync(60_000); + const result = await promise; + + expect(result).toBeNull(); + }); +}); + +describe("queryDeliveryMetricsSnapshot", () => { + it("returns null when logGroupNames is empty", async () => { + const result = await queryDeliveryMetricsSnapshot( + mockCloudWatchClient, + [], + 0, + 60, + ); + + expect(result).toBeNull(); + expect(mockCloudWatchClient.send).not.toHaveBeenCalled(); + }); + + it("returns null when StartQuery returns no queryId", async () => { + mockCloudWatchClient.send.mockResolvedValueOnce({} as never); + + const result = await queryDeliveryMetricsSnapshot( + mockCloudWatchClient, + ["/aws/lambda/test-https-client-perf-client-1"], + 0, + 60, + ); + + expect(result).toBeNull(); + }); + + it("sends logGroupNames to StartQuery", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-d1" } as never) + .mockResolvedValueOnce({ status: "Complete", results: [] } as never); + + const logGroups = [ + "/aws/lambda/test-https-client-perf-client-1", + "/aws/lambda/test-https-client-perf-client-2", + ]; + + const promise = queryDeliveryMetricsSnapshot( + mockCloudWatchClient, + logGroups, + 0, + 60, + ); + + await jest.runAllTimersAsync(); + await promise; + + const startCmd = mockCloudWatchClient.send.mock.calls[0][0] as { + input: { logGroupNames: string[] }; + }; + expect(startCmd.input.logGroupNames).toEqual(logGroups); + }); + + it("returns a snapshot with zeroed metrics when the result row is empty", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-d2" } as never) + .mockResolvedValueOnce({ status: "Complete", results: [] } as never); + + const promise = queryDeliveryMetricsSnapshot( + mockCloudWatchClient, + ["/aws/lambda/test-https-client-perf-client-1"], + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toMatchObject({ + deliveryCount: 0, + p50Ms: 0, + p95Ms: 0, + p99Ms: 0, + }); + expect(result?.snapshotAt).toBeGreaterThan(0); + }); + + it("returns a populated snapshot when query completes successfully", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-d3" } as never) + .mockResolvedValueOnce({ + status: "Complete", + results: [ + [ + { field: "deliveryCount", value: "200" }, + { field: "p50", value: "85" }, + { field: "p95", value: "250" }, + { field: "p99", value: "450" }, + ], + ], + } as never); + + const promise = queryDeliveryMetricsSnapshot( + mockCloudWatchClient, + ["/aws/lambda/test-https-client-perf-client-1"], + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toMatchObject({ + deliveryCount: 200, + p50Ms: 85, + p95Ms: 250, + p99Ms: 450, + }); + }); + + it("returns null when the query status is Failed", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-d4" } as never) + .mockResolvedValueOnce({ status: "Failed" } as never); + + const promise = queryDeliveryMetricsSnapshot( + mockCloudWatchClient, + ["/aws/lambda/test-https-client-perf-client-1"], + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toBeNull(); + }); +}); diff --git a/lambdas/perf-runner-lambda/src/__tests__/event-factories.test.ts b/lambdas/perf-runner-lambda/src/__tests__/event-factories.test.ts new file mode 100644 index 00000000..1c877a17 --- /dev/null +++ b/lambdas/perf-runner-lambda/src/__tests__/event-factories.test.ts @@ -0,0 +1,68 @@ +import { EventTypes } from "@nhs-notify-client-callbacks/models"; +import { + createChannelStatusEvent, + createEvent, + createMessageStatusEvent, +} from "event-factories"; + +describe("createMessageStatusEvent", () => { + it("creates a valid message status CloudEvent with the given clientId and status", () => { + const event = createMessageStatusEvent("perf-client-1", "DELIVERED"); + + expect(event.specversion).toBe("1.0"); + expect(event.type).toBe(EventTypes.MESSAGE_STATUS_PUBLISHED); + expect(event.datacontenttype).toBe("application/json"); + expect(event.data.clientId).toBe("perf-client-1"); + expect(event.data.messageStatus).toBe("DELIVERED"); + expect(event.data.messageId).toBeTruthy(); + expect(event.id).toBeTruthy(); + }); + + it("assigns a unique id and messageId on each call", () => { + const a = createMessageStatusEvent("perf-client-1", "FAILED"); + const b = createMessageStatusEvent("perf-client-1", "FAILED"); + + expect(a.id).not.toBe(b.id); + expect(a.data.messageId).not.toBe(b.data.messageId); + }); +}); + +describe("createChannelStatusEvent", () => { + it("creates a valid channel status CloudEvent with the given clientId and status", () => { + const event = createChannelStatusEvent("perf-client-2", "DELIVERED"); + + expect(event.specversion).toBe("1.0"); + expect(event.type).toBe(EventTypes.CHANNEL_STATUS_PUBLISHED); + expect(event.datacontenttype).toBe("application/json"); + expect(event.data.clientId).toBe("perf-client-2"); + expect(event.data.channelStatus).toBe("DELIVERED"); + expect(event.data.messageId).toBeTruthy(); + expect(event.id).toBeTruthy(); + }); +}); + +describe("createEvent", () => { + it("delegates to createMessageStatusEvent for messageStatus factory entries", () => { + const event = createEvent({ + weight: 1, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "SENDING", + }); + + expect(event.type).toBe(EventTypes.MESSAGE_STATUS_PUBLISHED); + expect(event.data.clientId).toBe("perf-client-1"); + }); + + it("delegates to createChannelStatusEvent for channelStatus factory entries", () => { + const event = createEvent({ + weight: 1, + factory: "channelStatus", + clientId: "perf-client-2", + channelStatus: "FAILED", + }); + + expect(event.type).toBe(EventTypes.CHANNEL_STATUS_PUBLISHED); + expect(event.data.clientId).toBe("perf-client-2"); + }); +}); diff --git a/lambdas/perf-runner-lambda/src/__tests__/index.test.ts b/lambdas/perf-runner-lambda/src/__tests__/index.test.ts new file mode 100644 index 00000000..1d1a501a --- /dev/null +++ b/lambdas/perf-runner-lambda/src/__tests__/index.test.ts @@ -0,0 +1,122 @@ +import { handler } from "index"; +import type { PerformanceResult } from "types"; +import { DEFAULT_SCENARIO } from "scenario"; + +import { runPerformanceTest } from "runner"; + +jest.mock("@aws-sdk/client-sqs", () => ({ + SQSClient: jest.fn(() => ({ destroy: jest.fn() })), +})); + +jest.mock("@aws-sdk/client-cloudwatch-logs", () => ({ + CloudWatchLogsClient: jest.fn(() => ({ destroy: jest.fn() })), +})); + +jest.mock("runner"); +jest.mock("@nhs-notify-client-callbacks/logger", () => ({ + Logger: jest.fn(() => ({ + info: jest.fn(), + error: jest.fn(), + })), +})); + +const mockRunPerformanceTest = runPerformanceTest as jest.MockedFunction< + typeof runPerformanceTest +>; + +const mockResult: PerformanceResult = { + testId: "test-id", + scenario: DEFAULT_SCENARIO, + startedAt: "2026-04-09T10:00:00.000Z", + completedAt: "2026-04-09T10:02:00.000Z", + phases: [], + metrics: [], + deliveryMetrics: [], +}; + +beforeEach(() => { + jest.clearAllMocks(); + mockRunPerformanceTest.mockResolvedValue(mockResult); + process.env.INBOUND_QUEUE_URL = "https://sqs.example.invalid/queue"; + process.env.TRANSFORM_FILTER_LOG_GROUP = + "/aws/lambda/nhs-dev-callbacks-client-transform-filter"; + process.env.DELIVERY_LOG_GROUP_PREFIX = + "/aws/lambda/nhs-dev-callbacks-https-client-"; + process.env.AWS_REGION = "eu-west-2"; +}); + +describe("handler", () => { + it("calls runPerformanceTest with the provided testId and scenario", async () => { + const result = await handler({ testId: "test-id" }); + + expect(result).toEqual(mockResult); + expect(mockRunPerformanceTest).toHaveBeenCalledWith( + expect.objectContaining({ + queueUrl: "https://sqs.example.invalid/queue", + logGroupName: "/aws/lambda/nhs-dev-callbacks-client-transform-filter", + deliveryLogGroupPrefix: "/aws/lambda/nhs-dev-callbacks-https-client-", + }), + DEFAULT_SCENARIO, + "test-id", + ); + }); + + it("uses a custom scenario when one is provided in the event", async () => { + const customScenario = { + ...DEFAULT_SCENARIO, + phases: [{ durationSecs: 5, targetEps: 500 }], + }; + + await handler({ testId: "custom-test", scenario: customScenario }); + + expect(mockRunPerformanceTest).toHaveBeenCalledWith( + expect.anything(), + customScenario, + "custom-test", + ); + }); + + it("destroys AWS clients even when runPerformanceTest throws", async () => { + const { SQSClient } = jest.requireMock("@aws-sdk/client-sqs"); + const mockDestroy = jest.fn(); + SQSClient.mockReturnValue({ destroy: mockDestroy }); + + mockRunPerformanceTest.mockRejectedValue(new Error("test failure")); + + await expect(handler({ testId: "failing-test" })).rejects.toThrow( + "test failure", + ); + expect(mockDestroy).toHaveBeenCalled(); + }); + + it("throws when INBOUND_QUEUE_URL is missing", async () => { + delete process.env.INBOUND_QUEUE_URL; + + await expect(handler({ testId: "missing-queue-test" })).rejects.toThrow( + "Missing required environment variable: INBOUND_QUEUE_URL", + ); + }); + + it("throws when TRANSFORM_FILTER_LOG_GROUP is missing", async () => { + delete process.env.TRANSFORM_FILTER_LOG_GROUP; + delete process.env.AWS_REGION; + + await expect(handler({ testId: "missing-log-group-test" })).rejects.toThrow( + "Missing required environment variable: TRANSFORM_FILTER_LOG_GROUP", + ); + }); + + it("passes undefined deliveryLogGroupPrefix when env var is not set", async () => { + delete process.env.DELIVERY_LOG_GROUP_PREFIX; + + await handler({ testId: "no-prefix-test" }); + + expect(mockRunPerformanceTest).toHaveBeenCalledWith( + expect.objectContaining({ + deliveryLogGroupPrefix: undefined, + }), + DEFAULT_SCENARIO, + "no-prefix-test", + ); + }); +}); diff --git a/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts b/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts new file mode 100644 index 00000000..1cf5f3a3 --- /dev/null +++ b/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts @@ -0,0 +1,323 @@ +import type { SQSClient } from "@aws-sdk/client-sqs"; +import type { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; +import type { + DeliveryMetricsSnapshot, + MetricsSnapshot, + PhaseResult, + RunnerDeps, + Scenario, +} from "types"; +import { defaultSleep, runPerformanceTest } from "runner"; + +import { generatePhaseLoad } from "sqs"; +import { queryDeliveryMetricsSnapshot, queryMetricsSnapshot } from "cloudwatch"; + +jest.mock("sqs"); +jest.mock("cloudwatch"); + +const mockGeneratePhaseLoad = jest.mocked(generatePhaseLoad); +const mockQueryMetricsSnapshot = jest.mocked(queryMetricsSnapshot); +const mockQueryDeliveryMetricsSnapshot = jest.mocked( + queryDeliveryMetricsSnapshot, +); + +const immediateSleep = jest.fn().mockResolvedValue(undefined); + +const mockPhaseResult: PhaseResult = { + targetEps: 1000, + achievedEps: 980, + sent: 1000, + durationMs: 1020, +}; + +const mockSnapshot: MetricsSnapshot = { + snapshotAt: Date.now(), + p50Ms: 30, + p95Ms: 80, + p99Ms: 150, + count: 100, +}; + +const mockDeliverySnapshot: DeliveryMetricsSnapshot = { + snapshotAt: Date.now(), + deliveryCount: 50, + p50Ms: 120, + p95Ms: 300, + p99Ms: 500, +}; + +const scenario: Scenario = { + phases: [{ durationSecs: 1, targetEps: 1000 }], + eventMix: [ + { + weight: 1, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "DELIVERED", + }, + ], + metricsIntervalSecs: 1, +}; + +const deps: RunnerDeps = { + sqsClient: {} as SQSClient, + cloudWatchClient: {} as CloudWatchLogsClient, + queueUrl: "https://sqs.example.invalid/queue", + logGroupName: "/aws/lambda/nhs-dev-callbacks-client-transform-filter", + deliveryLogGroupPrefix: "/aws/lambda/nhs-dev-callbacks-https-client-", +}; + +beforeEach(() => { + jest.clearAllMocks(); + mockGeneratePhaseLoad.mockResolvedValue(mockPhaseResult); + mockQueryDeliveryMetricsSnapshot.mockResolvedValue(null); + immediateSleep.mockResolvedValue(undefined); +}); + +describe("runPerformanceTest", () => { + it("returns a PerformanceResult with phase results and snapshots from polling and final query", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(mockSnapshot); + mockQueryDeliveryMetricsSnapshot.mockResolvedValue(mockDeliverySnapshot); + + const result = await runPerformanceTest( + deps, + scenario, + "test-id-1", + immediateSleep, + ); + + expect(result.testId).toBe("test-id-1"); + expect(result.scenario).toBe(scenario); + expect(result.phases).toHaveLength(1); + expect(result.phases[0]).toEqual(mockPhaseResult); + expect(result.metrics).toHaveLength(2); // one mid-test, one final + expect(result.deliveryMetrics).toHaveLength(2); // one mid-test, one final + expect(result.startedAt).toBeTruthy(); + expect(result.completedAt).toBeTruthy(); + }); + + it("excludes null snapshots from the metrics array", async () => { + mockQueryMetricsSnapshot + .mockResolvedValueOnce(null) // mid-test poll returns null + .mockResolvedValueOnce(mockSnapshot); // final query returns snapshot + + const result = await runPerformanceTest( + deps, + scenario, + "test-id-2", + immediateSleep, + ); + + expect(result.metrics).toHaveLength(1); + expect(result.metrics[0]).toEqual(mockSnapshot); + expect(result.deliveryMetrics).toHaveLength(0); + }); + + it("produces an empty metrics array when all queries return null", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(null); + + const result = await runPerformanceTest( + deps, + scenario, + "test-id-3", + immediateSleep, + ); + + expect(result.metrics).toHaveLength(0); + expect(result.deliveryMetrics).toHaveLength(0); + }); + + it("runs all phases and collects each result", async () => { + const multiPhaseScenario: Scenario = { + ...scenario, + phases: [ + { durationSecs: 1, targetEps: 500 }, + { durationSecs: 1, targetEps: 1000 }, + ], + }; + + const phase1Result = { ...mockPhaseResult, targetEps: 500 }; + const phase2Result = { ...mockPhaseResult, targetEps: 1000 }; + + mockGeneratePhaseLoad + .mockResolvedValueOnce(phase1Result) + .mockResolvedValueOnce(phase2Result); + mockQueryMetricsSnapshot.mockResolvedValue(null); + + const result = await runPerformanceTest( + deps, + multiPhaseScenario, + "test-id-4", + immediateSleep, + ); + + expect(result.phases).toHaveLength(2); + expect(result.phases[0]).toEqual(phase1Result); + expect(result.phases[1]).toEqual(phase2Result); + }); + + it("collects delivery metrics across multiple poll iterations", async () => { + let resolvePhase!: (value: PhaseResult) => void; + mockGeneratePhaseLoad.mockImplementation( + () => + new Promise((r) => { + resolvePhase = r; + }), + ); + mockQueryMetricsSnapshot.mockResolvedValue(mockSnapshot); + mockQueryDeliveryMetricsSnapshot.mockResolvedValue(mockDeliverySnapshot); + + let sleepCount = 0; + const controlledSleep = jest.fn(async () => { + sleepCount += 1; + if (sleepCount >= 3) { + resolvePhase(mockPhaseResult); + } + }); + + const result = await runPerformanceTest( + deps, + scenario, + "test-id-poll", + controlledSleep, + ); + + expect(result.deliveryMetrics.length).toBeGreaterThanOrEqual(1); + }); + + it("throws when scenario.eventMix is empty", async () => { + const emptyMixScenario: Scenario = { ...scenario, eventMix: [] }; + + await expect( + runPerformanceTest( + deps, + emptyMixScenario, + "empty-mix-test", + immediateSleep, + ), + ).rejects.toThrow("scenario.eventMix must contain at least one entry"); + }); + + it("throws when a phase has durationSecs of zero", async () => { + const badScenario: Scenario = { + ...scenario, + phases: [{ durationSecs: 0, targetEps: 1000 }], + }; + + await expect( + runPerformanceTest( + deps, + badScenario, + "zero-duration-test", + immediateSleep, + ), + ).rejects.toThrow("scenario.phases[0].durationSecs must be greater than 0"); + }); + + it("throws when a phase has targetEps of zero", async () => { + const badScenario: Scenario = { + ...scenario, + phases: [{ durationSecs: 1, targetEps: 0 }], + }; + + await expect( + runPerformanceTest(deps, badScenario, "zero-eps-test", immediateSleep), + ).rejects.toThrow("scenario.phases[0].targetEps must be greater than 0"); + }); + + it("throws when a later phase has an invalid value", async () => { + const badScenario: Scenario = { + ...scenario, + phases: [ + { durationSecs: 1, targetEps: 1000 }, + { durationSecs: 1, targetEps: 0 }, + ], + }; + + await expect( + runPerformanceTest(deps, badScenario, "later-phase-test", immediateSleep), + ).rejects.toThrow("scenario.phases[1].targetEps must be greater than 0"); + }); + + it("calls generatePhaseLoad with the correct phase and deps", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(null); + + await runPerformanceTest(deps, scenario, "test-id-5", immediateSleep); + + expect(mockGeneratePhaseLoad).toHaveBeenCalledWith( + deps.sqsClient, + deps.queueUrl, + scenario.phases[0], + scenario.eventMix, + ); + }); + + it("skips delivery metrics when deliveryLogGroupPrefix is undefined", async () => { + const depsWithoutPrefix: RunnerDeps = { + ...deps, + deliveryLogGroupPrefix: undefined, + }; + mockQueryMetricsSnapshot.mockResolvedValue(mockSnapshot); + + const result = await runPerformanceTest( + depsWithoutPrefix, + scenario, + "test-id-6", + immediateSleep, + ); + + expect(mockQueryDeliveryMetricsSnapshot).not.toHaveBeenCalled(); + expect(result.deliveryMetrics).toHaveLength(0); + }); + + it("builds delivery log group names from prefix and event mix client IDs", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(null); + mockQueryDeliveryMetricsSnapshot.mockResolvedValue(null); + + const multiClientScenario: Scenario = { + ...scenario, + eventMix: [ + { + weight: 1, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "DELIVERED", + }, + { + weight: 1, + factory: "channelStatus", + clientId: "perf-client-2", + channelStatus: "DELIVERED", + }, + ], + }; + + await runPerformanceTest( + deps, + multiClientScenario, + "test-id-7", + immediateSleep, + ); + + expect(mockQueryDeliveryMetricsSnapshot).toHaveBeenCalledWith( + deps.cloudWatchClient, + expect.arrayContaining([ + "/aws/lambda/nhs-dev-callbacks-https-client-perf-client-1", + "/aws/lambda/nhs-dev-callbacks-https-client-perf-client-2", + ]), + expect.any(Number), + expect.any(Number), + ); + }); +}); + +describe("defaultSleep", () => { + beforeEach(() => jest.useFakeTimers()); + afterEach(() => jest.useRealTimers()); + + it("resolves after the specified delay", async () => { + const promise = defaultSleep(500); + await jest.advanceTimersByTimeAsync(500); + await expect(promise).resolves.toBeUndefined(); + }); +}); diff --git a/lambdas/perf-runner-lambda/src/__tests__/sqs.test.ts b/lambdas/perf-runner-lambda/src/__tests__/sqs.test.ts new file mode 100644 index 00000000..63ab41df --- /dev/null +++ b/lambdas/perf-runner-lambda/src/__tests__/sqs.test.ts @@ -0,0 +1,141 @@ +import type { SQSClient } from "@aws-sdk/client-sqs"; +import type { EventMixEntry, Phase } from "types"; +import { generatePhaseLoad, selectWeighted, sendSqsBatch } from "sqs"; + +jest.mock("event-factories", () => ({ + createEvent: jest.fn(() => ({ + specversion: "1.0", + id: "mock-event-id", + type: "mock.type", + data: {}, + })), +})); + +const mockSqsClient = { + send: jest.fn(), +} as unknown as jest.Mocked; + +beforeEach(() => { + mockSqsClient.send.mockResolvedValue({} as never); +}); + +describe("selectWeighted", () => { + it("returns the only entry when there is one", () => { + const entries = [{ weight: 1, value: "a" }]; + const result = selectWeighted(entries); + expect(result).toBe(entries[0]); + }); + + it("distributes selections according to weight over many draws", () => { + const entries = [ + { weight: 9, label: "heavy" }, + { weight: 1, label: "light" }, + ]; + + const counts = { heavy: 0, light: 0 }; + for (let i = 0; i < 1000; i += 1) { + const selected = selectWeighted(entries); + counts[selected.label as keyof typeof counts] += 1; + } + + expect(counts.heavy).toBeGreaterThan(counts.light); + }); + + it("returns the last entry via fallback when no earlier entry matches", () => { + // With Math.random = 0.5, remaining = 0.5 * 10 = 5. + // First entry has weight 1; 5 - 1 = 4 > 0, so loop skips it. + // Fallback returns the last entry. + jest.spyOn(Math, "random").mockReturnValue(0.5); + const entries = [ + { weight: 1, label: "light" }, + { weight: 9, label: "heavy" }, + ]; + + const result = selectWeighted(entries); + expect(result.label).toBe("heavy"); + jest.restoreAllMocks(); + }); +}); + +describe("sendSqsBatch", () => { + it("sends a SendMessageBatchCommand with serialised event bodies", async () => { + const events = [ + { specversion: "1.0", id: "a", type: "t", data: {} }, + { specversion: "1.0", id: "b", type: "t", data: {} }, + ] as never[]; + + await sendSqsBatch( + mockSqsClient, + "https://sqs.example.invalid/queue", + events, + ); + + expect(mockSqsClient.send).toHaveBeenCalledTimes(1); + const command = mockSqsClient.send.mock.calls[0][0] as { + input: { + QueueUrl: string; + Entries: { Id: string; MessageBody: string }[]; + }; + }; + expect(command.input.QueueUrl).toBe("https://sqs.example.invalid/queue"); + expect(command.input.Entries).toHaveLength(2); + expect(command.input.Entries[0].Id).toBe("0"); + expect(JSON.parse(command.input.Entries[0].MessageBody)).toMatchObject({ + id: "a", + }); + }); +}); + +describe("generatePhaseLoad", () => { + it("returns a PhaseResult with sent count and timing", async () => { + const phase: Phase = { durationSecs: 1, targetEps: 10 }; + const eventMix: EventMixEntry[] = [ + { + weight: 1, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "DELIVERED", + }, + ]; + + const result = await generatePhaseLoad( + mockSqsClient, + "https://sqs.example.invalid/queue", + phase, + eventMix, + ); + + expect(result.targetEps).toBe(10); + expect(result.sent).toBeGreaterThan(0); + expect(result.durationMs).toBeGreaterThanOrEqual(0); + expect(result.achievedEps).toBeGreaterThan(0); + expect(mockSqsClient.send).toHaveBeenCalled(); + }); + + it("throttles between seconds when the wave completes early", async () => { + jest.useFakeTimers(); + + const phase: Phase = { durationSecs: 2, targetEps: 10 }; + const eventMix: EventMixEntry[] = [ + { + weight: 1, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "DELIVERED", + }, + ]; + + const resultPromise = generatePhaseLoad( + mockSqsClient, + "https://sqs.example.invalid/queue", + phase, + eventMix, + ); + + await jest.runAllTimersAsync(); + const result = await resultPromise; + + expect(result.sent).toBeGreaterThan(0); + jest.useRealTimers(); + }); +}); diff --git a/lambdas/perf-runner-lambda/src/cloudwatch.ts b/lambdas/perf-runner-lambda/src/cloudwatch.ts new file mode 100644 index 00000000..206bec33 --- /dev/null +++ b/lambdas/perf-runner-lambda/src/cloudwatch.ts @@ -0,0 +1,110 @@ +import { + type CloudWatchLogsClient, + GetQueryResultsCommand, + StartQueryCommand, +} from "@aws-sdk/client-cloudwatch-logs"; +import type { DeliveryMetricsSnapshot, MetricsSnapshot } from "types"; + +const INSIGHTS_POLL_INTERVAL_MS = 2000; +const INSIGHTS_TIMEOUT_MS = 30_000; + +type ResultField = { field?: string; value?: string }; + +async function pollQueryResults( + client: CloudWatchLogsClient, + queryId: string, + mapRow: (row: ResultField[]) => T, +): Promise { + const zeroResult = mapRow([]); + const deadline = Date.now() + INSIGHTS_TIMEOUT_MS; + + while (Date.now() < deadline) { + await new Promise((resolve) => { + setTimeout(resolve, INSIGHTS_POLL_INTERVAL_MS); + }); + + const response = await client.send(new GetQueryResultsCommand({ queryId })); + + if (response.status === "Failed" || response.status === "Cancelled") { + return null; + } + + if (response.status === "Complete") { + const row = response.results?.[0]; + if (!row) return zeroResult; + return mapRow(row); + } + } + + return null; +} + +export async function queryMetricsSnapshot( + client: CloudWatchLogsClient, + logGroupName: string, + startTimeSec: number, + endTimeSec: number, +): Promise { + const { queryId } = await client.send( + new StartQueryCommand({ + logGroupName, + startTime: startTimeSec, + endTime: endTimeSec, + queryString: [ + 'filter msg = "Callback lifecycle: batch-processing-completed"', + "| stats count(*) as eventCount, pct(processingTimeMs, 50) as p50, pct(processingTimeMs, 95) as p95, pct(processingTimeMs, 99) as p99", + ].join("\n"), + }), + ); + + if (!queryId) return null; + + return pollQueryResults(client, queryId, (row) => { + const getField = (name: string): number => + Number(row.find((f) => f.field === name)?.value ?? 0); + + return { + snapshotAt: Date.now(), + p50Ms: getField("p50"), + p95Ms: getField("p95"), + p99Ms: getField("p99"), + count: getField("eventCount"), + }; + }); +} + +export async function queryDeliveryMetricsSnapshot( + client: CloudWatchLogsClient, + logGroupNames: string[], + startTimeSec: number, + endTimeSec: number, +): Promise { + if (logGroupNames.length === 0) return null; + + const { queryId } = await client.send( + new StartQueryCommand({ + logGroupNames, + startTime: startTimeSec, + endTime: endTimeSec, + queryString: [ + "filter ispresent(DeliveryDurationMs)", + "| stats count(DeliveryDurationMs) as deliveryCount, pct(DeliveryDurationMs, 50) as p50, pct(DeliveryDurationMs, 95) as p95, pct(DeliveryDurationMs, 99) as p99", + ].join("\n"), + }), + ); + + if (!queryId) return null; + + return pollQueryResults(client, queryId, (row) => { + const getField = (name: string): number => + Number(row.find((f) => f.field === name)?.value ?? 0); + + return { + snapshotAt: Date.now(), + deliveryCount: getField("deliveryCount"), + p50Ms: getField("p50"), + p95Ms: getField("p95"), + p99Ms: getField("p99"), + }; + }); +} diff --git a/tests/performance/helpers/event-factories.ts b/lambdas/perf-runner-lambda/src/event-factories.ts similarity index 70% rename from tests/performance/helpers/event-factories.ts rename to lambdas/perf-runner-lambda/src/event-factories.ts index c31571e4..6f39add9 100644 --- a/tests/performance/helpers/event-factories.ts +++ b/lambdas/perf-runner-lambda/src/event-factories.ts @@ -1,22 +1,24 @@ import type { + ChannelStatus, ChannelStatusData, + MessageStatus, MessageStatusData, StatusPublishEvent, } from "@nhs-notify-client-callbacks/models"; import { EventTypes } from "@nhs-notify-client-callbacks/models"; +import type { EventMixEntry } from "types"; -export function createMessageStatusPublishEvent( - overrides?: Partial, +export function createMessageStatusEvent( + clientId: string, + messageStatus: MessageStatus, ): StatusPublishEvent { - const messageId = overrides?.messageId ?? crypto.randomUUID(); - const messageReference = - overrides?.messageReference ?? `ref-${crypto.randomUUID()}`; + const messageId = crypto.randomUUID(); const data: MessageStatusData = { - clientId: "mock-client-1", + clientId, messageId, - messageReference, - messageStatus: "DELIVERED", + messageReference: `ref-${crypto.randomUUID()}`, + messageStatus, channels: [{ type: "NHSAPP", channelStatus: "DELIVERED" }], timestamp: new Date().toISOString(), routingPlan: { @@ -25,7 +27,6 @@ export function createMessageStatusPublishEvent( version: "v1.0.0", createdDate: new Date().toISOString(), }, - ...overrides, }; return { @@ -43,26 +44,23 @@ export function createMessageStatusPublishEvent( }; } -export function createChannelStatusPublishEvent( - overrides?: Partial, +export function createChannelStatusEvent( + clientId: string, + channelStatus: ChannelStatus, ): StatusPublishEvent { - const messageId = overrides?.messageId ?? crypto.randomUUID(); - const messageReference = - overrides?.messageReference ?? `ref-${crypto.randomUUID()}`; + const messageId = crypto.randomUUID(); const data: ChannelStatusData = { - clientId: "mock-client-1", + clientId, messageId, - messageReference, + messageReference: `ref-${crypto.randomUUID()}`, channel: "NHSAPP", - channelStatus: "DELIVERED", - channelStatusDescription: "perf-test", + channelStatus, supplierStatus: "delivered", cascadeType: "primary", cascadeOrder: 0, timestamp: new Date().toISOString(), retryCount: 0, - ...overrides, }; return { @@ -79,3 +77,11 @@ export function createChannelStatusPublishEvent( data, }; } + +export function createEvent(entry: EventMixEntry): StatusPublishEvent { + if (entry.factory === "messageStatus") { + return createMessageStatusEvent(entry.clientId, entry.messageStatus); + } + + return createChannelStatusEvent(entry.clientId, entry.channelStatus); +} diff --git a/lambdas/perf-runner-lambda/src/index.ts b/lambdas/perf-runner-lambda/src/index.ts new file mode 100644 index 00000000..a0881866 --- /dev/null +++ b/lambdas/perf-runner-lambda/src/index.ts @@ -0,0 +1,55 @@ +import { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; +import { SQSClient } from "@aws-sdk/client-sqs"; +import { Logger } from "@nhs-notify-client-callbacks/logger"; +import { runPerformanceTest } from "runner"; +import { DEFAULT_SCENARIO } from "scenario"; +import type { PerfRunnerPayload, PerformanceResult } from "types"; + +const logger = new Logger(); + +export async function handler( + event: PerfRunnerPayload, +): Promise { + const { scenario = DEFAULT_SCENARIO, testId } = event; + + const region = process.env.AWS_REGION ?? "eu-west-2"; + const queueUrl = process.env.INBOUND_QUEUE_URL; + const logGroupName = process.env.TRANSFORM_FILTER_LOG_GROUP; + const deliveryLogGroupPrefix = process.env.DELIVERY_LOG_GROUP_PREFIX; + + if (!queueUrl) { + throw new Error("Missing required environment variable: INBOUND_QUEUE_URL"); + } + + if (!logGroupName) { + throw new Error( + "Missing required environment variable: TRANSFORM_FILTER_LOG_GROUP", + ); + } + + const sqsClient = new SQSClient({ region }); + const cloudWatchClient = new CloudWatchLogsClient({ region }); + + logger.info("Performance test started", { testId }); + + try { + const result = await runPerformanceTest( + { + sqsClient, + cloudWatchClient, + queueUrl, + logGroupName, + deliveryLogGroupPrefix, + }, + scenario, + testId, + ); + + logger.info("Performance test completed", { testId }); + + return result; + } finally { + sqsClient.destroy(); + cloudWatchClient.destroy(); + } +} diff --git a/lambdas/perf-runner-lambda/src/runner.ts b/lambdas/perf-runner-lambda/src/runner.ts new file mode 100644 index 00000000..a265e90e --- /dev/null +++ b/lambdas/perf-runner-lambda/src/runner.ts @@ -0,0 +1,140 @@ +import type { + DeliveryMetricsSnapshot, + MetricsSnapshot, + PerformanceResult, + PhaseResult, + RunnerDeps, + Scenario, +} from "types"; +import { generatePhaseLoad } from "sqs"; +import { queryDeliveryMetricsSnapshot, queryMetricsSnapshot } from "cloudwatch"; + +const CLOUDWATCH_SETTLING_MS = 60_000; + +export const defaultSleep = (ms: number): Promise => + new Promise((resolve) => { + setTimeout(resolve, ms); + }); + +function buildDeliveryLogGroupNames( + prefix: string | undefined, + scenario: Scenario, +): string[] { + if (!prefix) return []; + const clientIds = new Set(scenario.eventMix.map((e) => e.clientId)); + return [...clientIds].map((id) => `${prefix}${id}`); +} + +export async function runPerformanceTest( + deps: RunnerDeps, + scenario: Scenario, + testId: string, + sleepFn: (ms: number) => Promise = defaultSleep, +): Promise { + if (scenario.eventMix.length === 0) { + throw new Error("scenario.eventMix must contain at least one entry"); + } + + for (const [index, phase] of scenario.phases.entries()) { + if (phase.durationSecs <= 0) { + throw new Error( + `scenario.phases[${index}].durationSecs must be greater than 0`, + ); + } + if (phase.targetEps <= 0) { + throw new Error( + `scenario.phases[${index}].targetEps must be greater than 0`, + ); + } + } + + const testStartMs = Date.now(); + const startedAt = new Date(testStartMs).toISOString(); + const phaseResults: PhaseResult[] = []; + const snapshots: MetricsSnapshot[] = []; + const deliverySnapshots: DeliveryMetricsSnapshot[] = []; + let stopPolling = false; + + const deliveryLogGroupNames = buildDeliveryLogGroupNames( + deps.deliveryLogGroupPrefix, + scenario, + ); + + const pollLoop = async (): Promise => { + await sleepFn(scenario.metricsIntervalSecs * 1000); + while (!stopPolling) { + const startSec = Math.floor(testStartMs / 1000); + const endSec = Math.floor(Date.now() / 1000); + + const snap = await queryMetricsSnapshot( + deps.cloudWatchClient, + deps.logGroupName, + startSec, + endSec, + ); + if (snap !== null) snapshots.push(snap); + + if (deliveryLogGroupNames.length > 0) { + const deliverySnap = await queryDeliveryMetricsSnapshot( + deps.cloudWatchClient, + deliveryLogGroupNames, + startSec, + endSec, + ); + if (deliverySnap !== null) deliverySnapshots.push(deliverySnap); + } + + if (!stopPolling) { + await sleepFn(scenario.metricsIntervalSecs * 1000); + } + } + }; + + const pollPromise = pollLoop(); + + for (const phase of scenario.phases) { + const result = await generatePhaseLoad( + deps.sqsClient, + deps.queueUrl, + phase, + scenario.eventMix, + ); + phaseResults.push(result); + } + + stopPolling = true; + await pollPromise; + + await sleepFn(CLOUDWATCH_SETTLING_MS); + + const finalStartSec = Math.floor(testStartMs / 1000); + const finalEndSec = Math.floor(Date.now() / 1000); + + const finalSnap = await queryMetricsSnapshot( + deps.cloudWatchClient, + deps.logGroupName, + finalStartSec, + finalEndSec, + ); + if (finalSnap !== null) snapshots.push(finalSnap); + + if (deliveryLogGroupNames.length > 0) { + const finalDeliverySnap = await queryDeliveryMetricsSnapshot( + deps.cloudWatchClient, + deliveryLogGroupNames, + finalStartSec, + finalEndSec, + ); + if (finalDeliverySnap !== null) deliverySnapshots.push(finalDeliverySnap); + } + + return { + testId, + scenario, + startedAt, + completedAt: new Date().toISOString(), + phases: phaseResults, + metrics: snapshots, + deliveryMetrics: deliverySnapshots, + }; +} diff --git a/lambdas/perf-runner-lambda/src/scenario.ts b/lambdas/perf-runner-lambda/src/scenario.ts new file mode 100644 index 00000000..30c7cf72 --- /dev/null +++ b/lambdas/perf-runner-lambda/src/scenario.ts @@ -0,0 +1,82 @@ +import type { Scenario } from "types"; + +export const DEFAULT_SCENARIO: Scenario = { + phases: [ + { durationSecs: 15, targetEps: 1000 }, + { durationSecs: 15, targetEps: 2000 }, + { durationSecs: 30, targetEps: 3000 }, + ], + eventMix: [ + // perf-client-1: all message statuses → all subscription paths exercised + { + weight: 4, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "DELIVERED", + }, + { + weight: 2, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "FAILED", + }, + { + weight: 1, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "SENDING", + }, + { + weight: 1, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "PENDING_ENRICHMENT", + }, + // perf-client-2: channel status events + { + weight: 3, + factory: "channelStatus", + clientId: "perf-client-2", + channelStatus: "DELIVERED", + }, + { + weight: 1, + factory: "channelStatus", + clientId: "perf-client-2", + channelStatus: "FAILED", + }, + { + weight: 1, + factory: "channelStatus", + clientId: "perf-client-2", + channelStatus: "RETRY", + }, + // perf-client-3: DELIVERED matches (fan-out to 2 targets); SENDING is filtered + { + weight: 2, + factory: "messageStatus", + clientId: "perf-client-3", + messageStatus: "DELIVERED", + }, + { + weight: 1, + factory: "messageStatus", + clientId: "perf-client-3", + messageStatus: "SENDING", + }, + // perf-client-4: mixed message + channel status + { + weight: 2, + factory: "messageStatus", + clientId: "perf-client-4", + messageStatus: "DELIVERED", + }, + { + weight: 1, + factory: "channelStatus", + clientId: "perf-client-4", + channelStatus: "DELIVERED", + }, + ], + metricsIntervalSecs: 15, +}; diff --git a/lambdas/perf-runner-lambda/src/sqs.ts b/lambdas/perf-runner-lambda/src/sqs.ts new file mode 100644 index 00000000..154ce2e3 --- /dev/null +++ b/lambdas/perf-runner-lambda/src/sqs.ts @@ -0,0 +1,78 @@ +import { type SQSClient, SendMessageBatchCommand } from "@aws-sdk/client-sqs"; +import type { StatusPublishEvent } from "@nhs-notify-client-callbacks/models"; +import type { EventMixEntry, Phase, PhaseResult } from "types"; +import { createEvent } from "event-factories"; + +const SQS_MAX_BATCH_SIZE = 10; + +export function selectWeighted(entries: T[]): T { + const totalWeight = entries.reduce((sum, entry) => sum + entry.weight, 0); + // eslint-disable-next-line sonarjs/pseudo-random -- weighted selection for load test event distribution + let remaining = Math.random() * totalWeight; + + for (const entry of entries.slice(0, -1)) { + remaining -= entry.weight; + if (remaining <= 0) return entry; + } + + // Safe: selectWeighted is only called with non-empty arrays + return entries.at(-1)!; +} + +export async function sendSqsBatch( + client: SQSClient, + queueUrl: string, + events: StatusPublishEvent[], +): Promise { + await client.send( + new SendMessageBatchCommand({ + QueueUrl: queueUrl, + Entries: events.map((event, index) => ({ + Id: String(index), + MessageBody: JSON.stringify(event), + })), + }), + ); +} + +export async function generatePhaseLoad( + client: SQSClient, + queueUrl: string, + phase: Phase, + eventMix: EventMixEntry[], +): Promise { + const batchesPerSecond = Math.ceil(phase.targetEps / SQS_MAX_BATCH_SIZE); + const start = Date.now(); + let sent = 0; + + for (let second = 0; second < phase.durationSecs; second++) { + const waveStart = Date.now(); + + const batchResults = await Promise.all( + Array.from({ length: batchesPerSecond }, () => { + const batch = Array.from({ length: SQS_MAX_BATCH_SIZE }, () => + createEvent(selectWeighted(eventMix)), + ); + return sendSqsBatch(client, queueUrl, batch).then(() => batch.length); + }), + ); + + sent += batchResults.reduce((sum, count) => sum + count, 0); + + const remaining = 1000 - (Date.now() - waveStart); + if (remaining > 0 && second < phase.durationSecs - 1) { + await new Promise((resolve) => { + setTimeout(resolve, remaining); + }); + } + } + + const durationMs = Date.now() - start; + + return { + targetEps: phase.targetEps, + achievedEps: Math.round(sent / (durationMs / 1000)), + sent, + durationMs, + }; +} diff --git a/lambdas/perf-runner-lambda/src/types.ts b/lambdas/perf-runner-lambda/src/types.ts new file mode 100644 index 00000000..5366602d --- /dev/null +++ b/lambdas/perf-runner-lambda/src/types.ts @@ -0,0 +1,79 @@ +import type { SQSClient } from "@aws-sdk/client-sqs"; +import type { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; +import type { + ChannelStatus, + MessageStatus, +} from "@nhs-notify-client-callbacks/models"; + +export type MessageStatusMixEntry = { + weight: number; + factory: "messageStatus"; + clientId: string; + messageStatus: MessageStatus; +}; + +export type ChannelStatusMixEntry = { + weight: number; + factory: "channelStatus"; + clientId: string; + channelStatus: ChannelStatus; +}; + +export type EventMixEntry = MessageStatusMixEntry | ChannelStatusMixEntry; + +export type Phase = { + durationSecs: number; + targetEps: number; +}; + +export type Scenario = { + phases: Phase[]; + eventMix: EventMixEntry[]; + metricsIntervalSecs: number; +}; + +export type PhaseResult = { + targetEps: number; + achievedEps: number; + sent: number; + durationMs: number; +}; + +export type MetricsSnapshot = { + snapshotAt: number; + p50Ms: number; + p95Ms: number; + p99Ms: number; + count: number; +}; + +export type DeliveryMetricsSnapshot = { + snapshotAt: number; + deliveryCount: number; + p50Ms: number; + p95Ms: number; + p99Ms: number; +}; + +export type PerformanceResult = { + testId: string; + scenario: Scenario; + startedAt: string; + completedAt: string; + phases: PhaseResult[]; + metrics: MetricsSnapshot[]; + deliveryMetrics: DeliveryMetricsSnapshot[]; +}; + +export type PerfRunnerPayload = { + testId: string; + scenario?: Scenario; +}; + +export type RunnerDeps = { + sqsClient: SQSClient; + cloudWatchClient: CloudWatchLogsClient; + queueUrl: string; + logGroupName: string; + deliveryLogGroupPrefix?: string; +}; diff --git a/tests/performance/tsconfig.json b/lambdas/perf-runner-lambda/tsconfig.json similarity index 60% rename from tests/performance/tsconfig.json rename to lambdas/perf-runner-lambda/tsconfig.json index 2cc7bdfa..a50e6fc0 100644 --- a/tests/performance/tsconfig.json +++ b/lambdas/perf-runner-lambda/tsconfig.json @@ -2,16 +2,13 @@ "compilerOptions": { "isolatedModules": true, "paths": { - "helpers": [ - "./helpers/index" + "*": [ + "./src/*" ] } }, - "exclude": [ - "jest.config.ts" - ], "extends": "../../tsconfig.base.json", "include": [ - "**/*.ts" + "src/**/*" ] } diff --git a/package.json b/package.json index fa6f2223..aeddf03e 100644 --- a/package.json +++ b/package.json @@ -32,15 +32,6 @@ "typescript-eslint": "catalog:lint" }, "name": "nhs-notify-client-callbacks", - "pnpm": { - "overrides": { - "collect-v8-coverage": "^1.0.3", - "pretty-format>react-is": "19.0.0", - "flatted": "^3.4.0", - "fast-xml-parser": "^5.5.6", - "ts-jest>handlebars": "^4.7.9" - } - }, "scripts": { "generate-dependencies": "pnpm -r run --if-present generate-dependencies || true", "lint": "pnpm -r run lint", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 10503d34..c497eafb 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -448,6 +448,49 @@ importers: specifier: catalog:tools version: 5.9.3 + lambdas/perf-runner-lambda: + dependencies: + '@aws-sdk/client-cloudwatch-logs': + specifier: catalog:aws + version: 3.1026.0 + '@aws-sdk/client-sqs': + specifier: catalog:aws + version: 3.1026.0 + '@nhs-notify-client-callbacks/logger': + specifier: workspace:* + version: link:../../src/logger + '@nhs-notify-client-callbacks/models': + specifier: workspace:* + version: link:../../src/models + esbuild: + specifier: catalog:tools + version: 0.28.0 + devDependencies: + '@tsconfig/node22': + specifier: catalog:tools + version: 22.0.5 + '@types/aws-lambda': + specifier: catalog:tools + version: 8.10.161 + '@types/jest': + specifier: catalog:test + version: 30.0.0 + '@types/node': + specifier: catalog:tools + version: 25.6.0 + eslint: + specifier: catalog:lint + version: 9.39.4(jiti@2.6.1) + jest: + specifier: catalog:test + version: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) + ts-jest: + specifier: catalog:test + version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)))(typescript@5.9.3) + typescript: + specifier: catalog:tools + version: 5.9.3 + src/config-cache: dependencies: '@nhs-notify-client-callbacks/models': @@ -609,43 +652,6 @@ importers: specifier: catalog:tools version: 5.9.3 - tests/performance: - dependencies: - '@aws-sdk/client-cloudwatch-logs': - specifier: catalog:aws - version: 3.1026.0 - '@aws-sdk/client-sqs': - specifier: catalog:aws - version: 3.1026.0 - '@nhs-notify-client-callbacks/models': - specifier: workspace:* - version: link:../../src/models - '@nhs-notify-client-callbacks/test-support': - specifier: workspace:* - version: link:../test-support - async-wait-until: - specifier: catalog:app - version: 2.0.31 - devDependencies: - '@tsconfig/node22': - specifier: catalog:tools - version: 22.0.5 - '@types/jest': - specifier: catalog:test - version: 30.0.0 - '@types/node': - specifier: catalog:tools - version: 25.6.0 - eslint: - specifier: catalog:lint - version: 9.39.4(jiti@2.6.1) - jest: - specifier: catalog:test - version: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) - typescript: - specifier: catalog:tools - version: 5.9.3 - tests/test-support: dependencies: '@aws-sdk/client-cloudwatch-logs': diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 90671764..c343e4f9 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -6,6 +6,13 @@ packages: - "tools/*" blockExoticSubdeps: true +overrides: + collect-v8-coverage: "^1.0.3" + "pretty-format>react-is": "19.0.0" + flatted: "^3.4.0" + fast-xml-parser: "^5.5.6" + "ts-jest>handlebars": "^4.7.9" + catalogs: app: "@redis/client": "^1.5.14" diff --git a/tests/performance/README.md b/tests/performance/README.md deleted file mode 100644 index 552dda01..00000000 --- a/tests/performance/README.md +++ /dev/null @@ -1,32 +0,0 @@ -# performance - -Load tests for the client-callbacks service. These tests run against a real deployed AWS environment — they are not unit tests and cannot run locally without a live stack. - -## Prerequisites - -- AWS credentials configured for the target environment -- The service deployed to the target environment - -## Environment Variables - -| Variable | Required | Default | Description | -| --- | --- | --- | --- | -| `ENVIRONMENT` | Yes | — | Target environment name (e.g. `dev`) | -| `AWS_ACCOUNT_ID` | Yes | — | AWS account ID for the target environment | -| `AWS_REGION` | No | `eu-west-2` | AWS region | -| `PROJECT` | No | `nhs` | Project name prefix used in resource naming | -| `COMPONENT` | No | `callbacks` | Component name used in resource naming | - -## Running - -From the repository root: - -```bash -ENVIRONMENT=dev AWS_ACCOUNT_ID=123456789012 pnpm run test:performance --filter tests/performance -``` - -## What the Tests Do - -The load test sends ~3,000 events/s to the SQS inbound queue for 30 seconds, then reads CloudWatch Logs to assert that the p95 Lambda processing time is below 500ms. - -The global teardown removes the test client subscription config from S3. diff --git a/tests/performance/fixtures/subscriptions/perf-client-1.json b/tests/performance/fixtures/subscriptions/perf-client-1.json new file mode 100644 index 00000000..1c730b8a --- /dev/null +++ b/tests/performance/fixtures/subscriptions/perf-client-1.json @@ -0,0 +1,44 @@ +{ + "clientId": "perf-client-1", + "subscriptions": [ + { + "messageStatuses": [ + "DELIVERED", + "FAILED", + "SENDING", + "PENDING_ENRICHMENT", + "ENRICHED" + ], + "subscriptionId": "sub-451afe55-2c8f-4103-a5f7-7bcf79e8e476", + "subscriptionType": "MessageStatus", + "targetIds": [ + "target-39dbd795-5909-40ab-95b2-4e88b11a2813" + ] + } + ], + "targets": [ + { + "apiKey": { + "headerName": "x-api-key", + "headerValue": "REPLACED_BY_TERRAFORM" + }, + "delivery": { + "circuitBreaker": { + "enabled": true + }, + "maxRetryDurationSeconds": 7200, + "mtls": { + "certPinning": { + "enabled": false + }, + "enabled": false + } + }, + "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", + "invocationMethod": "POST", + "invocationRateLimit": 300, + "targetId": "target-39dbd795-5909-40ab-95b2-4e88b11a2813", + "type": "API" + } + ] +} diff --git a/tests/performance/fixtures/subscriptions/perf-client-2.json b/tests/performance/fixtures/subscriptions/perf-client-2.json new file mode 100644 index 00000000..d3c58a93 --- /dev/null +++ b/tests/performance/fixtures/subscriptions/perf-client-2.json @@ -0,0 +1,54 @@ +{ + "clientId": "perf-client-2", + "subscriptions": [ + { + "channelStatuses": [ + "DELIVERED", + "FAILED", + "RETRY", + "SKIPPED", + "SENDING", + "CREATED" + ], + "channelType": "NHSAPP", + "subscriptionId": "sub-ace58855-9f6b-4491-8cee-abb99d997ced", + "subscriptionType": "ChannelStatus", + "supplierStatuses": [ + "delivered", + "permanent_failure", + "temporary_failure", + "pending", + "sending", + "sent" + ], + "targetIds": [ + "target-e3ccc2c2-7b19-4475-80d5-51a1182d239a" + ] + } + ], + "targets": [ + { + "apiKey": { + "headerName": "x-api-key", + "headerValue": "REPLACED_BY_TERRAFORM" + }, + "delivery": { + "circuitBreaker": { + "enabled": true + }, + "maxRetryDurationSeconds": 7200, + "mtls": { + "certPinning": { + "enabled": false + }, + "enabled": false + } + }, + "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", + "invocationMethod": "POST", + "invocationRateLimit": 300, + "targetId": "target-e3ccc2c2-7b19-4475-80d5-51a1182d239a", + "type": "API" + } + ] +} diff --git a/tests/performance/fixtures/subscriptions/perf-client-3.json b/tests/performance/fixtures/subscriptions/perf-client-3.json new file mode 100644 index 00000000..8034177b --- /dev/null +++ b/tests/performance/fixtures/subscriptions/perf-client-3.json @@ -0,0 +1,64 @@ +{ + "clientId": "perf-client-3", + "subscriptions": [ + { + "messageStatuses": [ + "DELIVERED" + ], + "subscriptionId": "sub-72197a52-8f4a-4b9d-b074-90f51183b91c", + "subscriptionType": "MessageStatus", + "targetIds": [ + "target-9f81befc-8cd2-49d7-9972-40b11c932d80", + "target-42228749-1610-4862-bcbf-ffb5b3a6f7eb" + ] + } + ], + "targets": [ + { + "apiKey": { + "headerName": "x-api-key", + "headerValue": "REPLACED_BY_TERRAFORM" + }, + "delivery": { + "circuitBreaker": { + "enabled": true + }, + "maxRetryDurationSeconds": 7200, + "mtls": { + "certPinning": { + "enabled": false + }, + "enabled": false + } + }, + "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", + "invocationMethod": "POST", + "invocationRateLimit": 300, + "targetId": "target-9f81befc-8cd2-49d7-9972-40b11c932d80", + "type": "API" + }, + { + "apiKey": { + "headerName": "x-api-key", + "headerValue": "REPLACED_BY_TERRAFORM" + }, + "delivery": { + "circuitBreaker": { + "enabled": true + }, + "maxRetryDurationSeconds": 7200, + "mtls": { + "certPinning": { + "enabled": false + }, + "enabled": false + } + }, + "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", + "invocationMethod": "POST", + "invocationRateLimit": 300, + "targetId": "target-42228749-1610-4862-bcbf-ffb5b3a6f7eb", + "type": "API" + } + ] +} diff --git a/tests/performance/fixtures/subscriptions/perf-client-4.json b/tests/performance/fixtures/subscriptions/perf-client-4.json new file mode 100644 index 00000000..b6c72346 --- /dev/null +++ b/tests/performance/fixtures/subscriptions/perf-client-4.json @@ -0,0 +1,57 @@ +{ + "clientId": "perf-client-4", + "subscriptions": [ + { + "messageStatuses": [ + "DELIVERED", + "FAILED" + ], + "subscriptionId": "sub-31908329-f6ce-4655-94a0-1ceb42073f13", + "subscriptionType": "MessageStatus", + "targetIds": [ + "target-11c2d19e-e8c9-4058-8175-546eabd1def2" + ] + }, + { + "channelStatuses": [ + "DELIVERED", + "FAILED" + ], + "channelType": "NHSAPP", + "subscriptionId": "sub-4f8a6b2c-d193-47e5-b860-7a9f3c1d2e4b", + "subscriptionType": "ChannelStatus", + "supplierStatuses": [ + "delivered", + "permanent_failure" + ], + "targetIds": [ + "target-11c2d19e-e8c9-4058-8175-546eabd1def2" + ] + } + ], + "targets": [ + { + "apiKey": { + "headerName": "x-api-key", + "headerValue": "REPLACED_BY_TERRAFORM" + }, + "delivery": { + "circuitBreaker": { + "enabled": true + }, + "maxRetryDurationSeconds": 7200, + "mtls": { + "certPinning": { + "enabled": false + }, + "enabled": false + } + }, + "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", + "invocationMethod": "POST", + "invocationRateLimit": 300, + "targetId": "target-11c2d19e-e8c9-4058-8175-546eabd1def2", + "type": "API" + } + ] +} diff --git a/tests/performance/helpers/cloudwatch.ts b/tests/performance/helpers/cloudwatch.ts deleted file mode 100644 index 33772ba6..00000000 --- a/tests/performance/helpers/cloudwatch.ts +++ /dev/null @@ -1,156 +0,0 @@ -import { - CloudWatchLogsClient, - FilterLogEventsCommand, - GetQueryResultsCommand, - StartQueryCommand, -} from "@aws-sdk/client-cloudwatch-logs"; -import { waitUntil } from "async-wait-until"; - -const POLL_INTERVAL_MS = 2000; -const COLLECT_TIMEOUT_MS = 120_000; - -type BatchCompletedLogEntry = { - processingTimeMs: number; - batchSize: number; - successful: number; - failed: number; - filtered: number; -}; - -export async function collectBatchProcessingTimes( - client: CloudWatchLogsClient, - logGroupName: string, - expectedCount: number, - startTime: number, -): Promise { - const collected: number[] = []; - - await waitUntil( - async () => { - const response = await client.send( - new FilterLogEventsCommand({ - logGroupName, - startTime, - filterPattern: '{ $.msg = "batch-processing-completed" }', - }), - ); - - for (const event of response.events ?? []) { - if (event.message) { - try { - const entry = JSON.parse(event.message) as BatchCompletedLogEntry; - if (typeof entry.processingTimeMs === "number") { - collected.push(entry.processingTimeMs); - } - } catch { - // skip unparseable entries - } - } - } - - return collected.length >= expectedCount; - }, - { timeout: COLLECT_TIMEOUT_MS, intervalBetweenAttempts: POLL_INTERVAL_MS }, - ); - - return collected; -} - -export function computePercentile( - samples: number[], - percentile: number, -): number { - if (samples.length === 0) { - throw new Error("Cannot compute percentile of empty array"); - } - - const sorted = [...samples].toSorted((a, b) => a - b); - const index = Math.ceil((percentile / 100) * sorted.length) - 1; - return sorted[Math.max(0, index)]; -} - -const INSIGHTS_QUERY_TIMEOUT_MS = 60_000; -const INSIGHTS_COLLECT_TIMEOUT_MS = 300_000; - -async function runInsightsQuery( - client: CloudWatchLogsClient, - logGroupName: string, - startTimeSec: number, - endTimeSec: number, - percentile: number, -): Promise<{ count: number; percentileMs: number } | null> { - const { queryId } = await client.send( - new StartQueryCommand({ - logGroupName, - startTime: startTimeSec, - endTime: endTimeSec, - queryString: [ - 'filter msg = "batch-processing-completed"', - `| stats count(*) as eventCount, pct(processingTimeMs, ${percentile}) as p`, - ].join("\n"), - }), - ); - - if (!queryId) return null; - - const deadline = Date.now() + INSIGHTS_QUERY_TIMEOUT_MS; - - while (Date.now() < deadline) { - await new Promise((resolve) => { - setTimeout(resolve, 2000); - }); - - const response = await client.send(new GetQueryResultsCommand({ queryId })); - - if (response.status === "Failed" || response.status === "Cancelled") { - return null; - } - - if (response.status === "Complete") { - const row = response.results?.[0]; - if (!row) return null; - - return { - count: Number(row.find((f) => f.field === "eventCount")?.value ?? 0), - percentileMs: Number(row.find((f) => f.field === "p")?.value ?? 0), - }; - } - } - - return null; -} - -export async function waitForBatchProcessingPercentile( - client: CloudWatchLogsClient, - logGroupName: string, - testStartTime: number, - expectedCount: number, - percentile: number, -): Promise<{ count: number; percentileMs: number }> { - const startTimeSec = Math.floor(testStartTime / 1000); - let result = { count: 0, percentileMs: 0 }; - - await waitUntil( - async () => { - const endTimeSec = Math.floor((Date.now() + 60_000) / 1000); - const queryResult = await runInsightsQuery( - client, - logGroupName, - startTimeSec, - endTimeSec, - percentile, - ); - - if (!queryResult) return false; - - result = queryResult; - return result.count >= expectedCount; - }, - { - timeout: INSIGHTS_COLLECT_TIMEOUT_MS, - intervalBetweenAttempts: POLL_INTERVAL_MS, - }, - ); - - return result; -} diff --git a/tests/performance/helpers/deployment.ts b/tests/performance/helpers/deployment.ts deleted file mode 100644 index 5d6ee82e..00000000 --- a/tests/performance/helpers/deployment.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { - type DeploymentDetails, - buildLambdaLogGroupName, -} from "@nhs-notify-client-callbacks/test-support/helpers/deployment"; - -export function buildTransformFilterLambdaLogGroupName( - details: DeploymentDetails, -): string { - return buildLambdaLogGroupName(details, "client-transform-filter"); -} diff --git a/tests/performance/helpers/index.ts b/tests/performance/helpers/index.ts deleted file mode 100644 index 194022a3..00000000 --- a/tests/performance/helpers/index.ts +++ /dev/null @@ -1,4 +0,0 @@ -export * from "./cloudwatch"; -export * from "./deployment"; -export * from "./event-factories"; -export * from "./sqs"; diff --git a/tests/performance/helpers/sqs.ts b/tests/performance/helpers/sqs.ts deleted file mode 100644 index e8d5b171..00000000 --- a/tests/performance/helpers/sqs.ts +++ /dev/null @@ -1,72 +0,0 @@ -import { - SQSClient, - SendMessageBatchCommand, - SendMessageCommand, -} from "@aws-sdk/client-sqs"; -import type { StatusPublishEvent } from "@nhs-notify-client-callbacks/models"; - -export async function sendSqsEvent( - client: SQSClient, - queueUrl: string, - event: StatusPublishEvent, -): Promise { - await client.send( - new SendMessageCommand({ - QueueUrl: queueUrl, - MessageBody: JSON.stringify(event), - }), - ); -} - -const SQS_MAX_BATCH_SIZE = 10; - -export async function sendSqsBatch( - client: SQSClient, - queueUrl: string, - events: StatusPublishEvent[], -): Promise { - await client.send( - new SendMessageBatchCommand({ - QueueUrl: queueUrl, - Entries: events.map((event, index) => ({ - Id: String(index), - MessageBody: JSON.stringify(event), - })), - }), - ); -} - -export async function generateSqsLoad( - client: SQSClient, - queueUrl: string, - targetEventsPerSecond: number, - durationSeconds: number, - eventFactory: () => StatusPublishEvent, -): Promise<{ sent: number; durationMs: number }> { - const batchesPerSecond = Math.ceil( - targetEventsPerSecond / SQS_MAX_BATCH_SIZE, - ); - const start = Date.now(); - let sent = 0; - - for (let second = 0; second < durationSeconds; second++) { - const waveStart = Date.now(); - - const results = await Promise.all( - Array.from({ length: batchesPerSecond }, () => { - const batch = Array.from({ length: SQS_MAX_BATCH_SIZE }, eventFactory); - return sendSqsBatch(client, queueUrl, batch).then(() => batch.length); - }), - ); - sent += results.reduce((sum, count) => sum + count, 0); - - const remaining = 1000 - (Date.now() - waveStart); - if (remaining > 0 && second < durationSeconds - 1) { - await new Promise((resolve) => { - setTimeout(resolve, remaining); - }); - } - } - - return { sent, durationMs: Date.now() - start }; -} diff --git a/tests/performance/jest.config.ts b/tests/performance/jest.config.ts deleted file mode 100644 index 06f45e6d..00000000 --- a/tests/performance/jest.config.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { nodeJestConfig } from "../../jest.config.base.ts"; - -export default { - ...nodeJestConfig, - modulePaths: [""], - collectCoverage: false, - moduleNameMapper: { - "^helpers$": "/helpers/index", - }, - // Run performance tests serially to avoid queue contention - maxWorkers: 1, - // Force exit after tests complete — real AWS SDK clients keep connections alive - forceExit: true, -}; diff --git a/tests/performance/lambda-throughput.test.ts b/tests/performance/lambda-throughput.test.ts deleted file mode 100644 index 5a543ab6..00000000 --- a/tests/performance/lambda-throughput.test.ts +++ /dev/null @@ -1,76 +0,0 @@ -import { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; -import { SQSClient } from "@aws-sdk/client-sqs"; -import { - buildInboundEventQueueUrl, - createCloudWatchLogsClient, - createSqsClient, - getDeploymentDetails, -} from "@nhs-notify-client-callbacks/test-support/helpers"; -import { - buildTransformFilterLambdaLogGroupName, - createMessageStatusPublishEvent, - generateSqsLoad, - waitForBatchProcessingPercentile, -} from "helpers"; - -const TARGET_EPS = 3000; -const LOAD_DURATION_SECONDS = 30; -const P95_LATENCY_THRESHOLD_MS = 500; - -describe("Lambda throughput and latency under load", () => { - let sqsClient: SQSClient; - let cloudWatchClient: CloudWatchLogsClient; - let inboundQueueUrl: string; - let lambdaLogGroupName: string; - - beforeAll(() => { - const deploymentDetails = getDeploymentDetails(); - - sqsClient = createSqsClient(deploymentDetails); - cloudWatchClient = createCloudWatchLogsClient(deploymentDetails); - inboundQueueUrl = buildInboundEventQueueUrl(deploymentDetails); - lambdaLogGroupName = - buildTransformFilterLambdaLogGroupName(deploymentDetails); - }); - - afterAll(() => { - sqsClient.destroy(); - cloudWatchClient.destroy(); - }); - - it(`should sustain ~${TARGET_EPS} events/s for ${LOAD_DURATION_SECONDS}s with p95 Lambda processing time below ${P95_LATENCY_THRESHOLD_MS}ms`, async () => { - const testStartTime = Date.now(); - - const { durationMs, sent } = await generateSqsLoad( - sqsClient, - inboundQueueUrl, - TARGET_EPS, - LOAD_DURATION_SECONDS, - createMessageStatusPublishEvent, - ); - - const achievedEps = Math.round(sent / (durationMs / 1000)); - console.log( - `Load generation: ${sent} events in ${durationMs}ms (${achievedEps} eps achieved)`, - ); - - // Accept ≥90% of sent events processed — accounts for any events routed to DLQ - // due to transient Lambda errors under concurrency pressure. - const minExpectedCount = Math.floor(sent * 0.9); - - const { count, percentileMs } = await waitForBatchProcessingPercentile( - cloudWatchClient, - lambdaLogGroupName, - testStartTime, - minExpectedCount, - 95, - ); - - console.log( - `Processing: ${count} events logged, p95 Lambda processing time: ${percentileMs}ms`, - ); - - expect(count).toBeGreaterThanOrEqual(minExpectedCount); - expect(percentileMs).toBeLessThan(P95_LATENCY_THRESHOLD_MS); - }, 600_000); -}); diff --git a/tools/client-subscriptions-management/src/repository/client-subscriptions.ts b/tools/client-subscriptions-management/src/repository/client-subscriptions.ts index 4a744fc3..04fc266b 100644 --- a/tools/client-subscriptions-management/src/repository/client-subscriptions.ts +++ b/tools/client-subscriptions-management/src/repository/client-subscriptions.ts @@ -131,7 +131,7 @@ export class ClientSubscriptionRepository { const updated: ClientSubscriptionConfiguration = { ...config, subscriptions: config.subscriptions.map( - // eslint-disable-next-line sonarjs/function-return-type -- false positive: complex conditional spread returns are all SubscriptionConfiguration subtypes + // eslint-disable-next-line sonarjs/function-return-type (sub): SubscriptionConfiguration => { if (sub.subscriptionId !== subscriptionId) return sub; if (sub.subscriptionType === "MessageStatus") { From ebf9e81cb7f467800b98161da70cc657df6aaae7 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Fri, 24 Apr 2026 15:42:55 +0100 Subject: [PATCH 27/65] CCM-16073 - ITs, metrics fix, log correlationId (#156) * Rename mock-client-1/2 and add mtls test * Retry window and exhaustion test * DLQ redrive delivery queue test * Metric test coverage * Rate limit tests * Fix: reset metrics per invocation * Fix new tests to use UUIDs as message ids rather than timestamps * Add correlation id to logging * Batch count logging * Circuit breaker test * Integration test tidy up refactor * Log receive count and sqsMessageId * Fix batch success count on DLQ * Log receive count for all messages --- .../__tests__/delivery-observability.test.ts | 73 +++- .../__tests__/fixtures/handler-fixtures.ts | 8 +- .../src/__tests__/handler.test.ts | 1 + lambdas/https-client-lambda/src/handler.ts | 79 ++-- .../src/services/delivery-observability.ts | 39 +- lambdas/mock-webhook-lambda/src/index.ts | 1 + scripts/tests/integration-env.sh | 8 +- tests/integration/delivery-resilience.test.ts | 254 +++++++++++++ tests/integration/dlq-redrive.test.ts | 230 ++++++------ ...client-2.json => mock-client-fan-out.json} | 2 +- .../mock-client-short-retry.json | 38 ++ ...-1.json => mock-client-single-target.json} | 2 +- tests/integration/helpers/cloudwatch.ts | 353 +++++++++--------- tests/integration/helpers/event-factories.ts | 32 -- .../integration/helpers/mock-client-config.ts | 39 +- tests/integration/helpers/sqs.ts | 29 +- tests/integration/helpers/status-events.ts | 89 ----- tests/integration/helpers/test-context.ts | 52 +++ .../inbound-sqs-to-webhook.test.ts | 272 ++++++-------- tests/integration/metrics.test.ts | 202 +++++----- 20 files changed, 1085 insertions(+), 718 deletions(-) create mode 100644 tests/integration/delivery-resilience.test.ts rename tests/integration/fixtures/subscriptions/{mock-client-2.json => mock-client-fan-out.json} (97%) create mode 100644 tests/integration/fixtures/subscriptions/mock-client-short-retry.json rename tests/integration/fixtures/subscriptions/{mock-client-1.json => mock-client-single-target.json} (96%) delete mode 100644 tests/integration/helpers/status-events.ts create mode 100644 tests/integration/helpers/test-context.ts diff --git a/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts b/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts index c4c3ab5f..25e164a9 100644 --- a/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts @@ -39,12 +39,16 @@ describe("delivery-observability", () => { ); const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); - recordDeliveryAttempt("client-1", "target-1"); + recordDeliveryAttempt("client-1", "target-1", "msg-123"); expect(emitDeliveryAttempt).toHaveBeenCalledWith("target-1"); expect(logger.info).toHaveBeenCalledWith( "Attempting delivery", - expect.objectContaining({ clientId: "client-1", targetId: "target-1" }), + expect.objectContaining({ + clientId: "client-1", + targetId: "target-1", + correlationId: "msg-123", + }), ); }); @@ -54,12 +58,16 @@ describe("delivery-observability", () => { ); const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); - recordDeliverySuccess("client-1", "target-1"); + recordDeliverySuccess("client-1", "target-1", "msg-123"); expect(emitDeliverySuccess).toHaveBeenCalledWith("target-1"); expect(logger.info).toHaveBeenCalledWith( "Delivery succeeded", - expect.objectContaining({ clientId: "client-1", targetId: "target-1" }), + expect.objectContaining({ + clientId: "client-1", + targetId: "target-1", + correlationId: "msg-123", + }), ); }); @@ -69,12 +77,22 @@ describe("delivery-observability", () => { ); const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); - recordDeliveryPermanentFailure("client-1", "target-1"); + recordDeliveryPermanentFailure( + "client-1", + "target-1", + undefined, + undefined, + "msg-123", + ); expect(emitDeliveryPermanentFailure).toHaveBeenCalledWith("target-1"); expect(logger.warn).toHaveBeenCalledWith( - "Permanent delivery failure — sending to DLQ", - expect.objectContaining({ clientId: "client-1", targetId: "target-1" }), + "Permanent delivery failure \u2014 sending to DLQ", + expect.objectContaining({ + clientId: "client-1", + targetId: "target-1", + correlationId: "msg-123", + }), ); }); @@ -82,12 +100,16 @@ describe("delivery-observability", () => { const { emitRateLimited } = jest.requireMock("services/delivery-metrics"); const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); - recordDeliveryRateLimited("client-1", "target-1"); + recordDeliveryRateLimited("client-1", "target-1", "msg-123"); expect(emitRateLimited).toHaveBeenCalledWith("target-1"); expect(logger.info).toHaveBeenCalledWith( "Rate limited (429)", - expect.objectContaining({ clientId: "client-1", targetId: "target-1" }), + expect.objectContaining({ + clientId: "client-1", + targetId: "target-1", + correlationId: "msg-123", + }), ); }); @@ -97,16 +119,18 @@ describe("delivery-observability", () => { ); const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); - recordDeliveryFailure("client-1", "target-1", 503, 30); + recordDeliveryFailure("client-1", "target-1", 503, 30, 3, "msg-123"); expect(emitDeliveryFailure).toHaveBeenCalledWith("target-1"); expect(logger.warn).toHaveBeenCalledWith( - "Transient delivery failure — requeuing", + "Transient delivery failure \u2014 requeuing", expect.objectContaining({ clientId: "client-1", targetId: "target-1", + correlationId: "msg-123", statusCode: 503, backoffSec: 30, + receiveCount: 3, }), ); }); @@ -117,12 +141,15 @@ describe("delivery-observability", () => { ); const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); - recordCircuitBreakerOpen("target-1"); + recordCircuitBreakerOpen("target-1", "msg-123"); expect(emitCircuitBreakerOpen).toHaveBeenCalledWith("target-1"); expect(logger.warn).toHaveBeenCalledWith( "Circuit breaker opened", - expect.objectContaining({ targetId: "target-1" }), + expect.objectContaining({ + targetId: "target-1", + correlationId: "msg-123", + }), ); }); @@ -132,12 +159,15 @@ describe("delivery-observability", () => { ); const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); - recordCircuitBreakerClosed("target-1"); + recordCircuitBreakerClosed("target-1", "msg-123"); expect(emitCircuitBreakerClosed).toHaveBeenCalledWith("target-1"); expect(logger.info).toHaveBeenCalledWith( "Circuit breaker closed", - expect.objectContaining({ targetId: "target-1" }), + expect.objectContaining({ + targetId: "target-1", + correlationId: "msg-123", + }), ); }); @@ -147,12 +177,16 @@ describe("delivery-observability", () => { ); const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); - recordRetryWindowExhausted("client-1", "target-1"); + recordRetryWindowExhausted("client-1", "target-1", "msg-123"); expect(emitRetryWindowExhausted).toHaveBeenCalledWith("target-1"); expect(logger.warn).toHaveBeenCalledWith( - "Retry window exhausted — sending to DLQ", - expect.objectContaining({ clientId: "client-1", targetId: "target-1" }), + "Retry window exhausted \u2014 sending to DLQ", + expect.objectContaining({ + clientId: "client-1", + targetId: "target-1", + correlationId: "msg-123", + }), ); }); @@ -162,7 +196,7 @@ describe("delivery-observability", () => { ); const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); - recordAdmissionDenied("client-1", "target-1", "rate_limited"); + recordAdmissionDenied("client-1", "target-1", "rate_limited", "msg-123"); expect(emitAdmissionDenied).toHaveBeenCalledWith( "target-1", @@ -173,6 +207,7 @@ describe("delivery-observability", () => { expect.objectContaining({ clientId: "client-1", targetId: "target-1", + correlationId: "msg-123", reason: "rate_limited", }), ); diff --git a/lambdas/https-client-lambda/src/__tests__/fixtures/handler-fixtures.ts b/lambdas/https-client-lambda/src/__tests__/fixtures/handler-fixtures.ts index 3a93b63f..731d478a 100644 --- a/lambdas/https-client-lambda/src/__tests__/fixtures/handler-fixtures.ts +++ b/lambdas/https-client-lambda/src/__tests__/fixtures/handler-fixtures.ts @@ -18,7 +18,13 @@ export const makeRecord = (overrides: Partial = {}): SQSRecord => ({ body: JSON.stringify({ payload: { data: [ - { type: "MessageStatus", attributes: { messageStatus: "delivered" } }, + { + type: "MessageStatus", + attributes: { + messageId: "test-message-id", + messageStatus: "delivered", + }, + }, ], }, subscriptionId: "sub-1", diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index f6cbdb68..3b8ad521 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -86,6 +86,7 @@ jest.mock("services/delivery-metrics", () => ({ emitRateLimited: jest.fn(), emitRetryWindowExhausted: jest.fn(), flushMetrics: jest.fn().mockResolvedValue(undefined), + resetMetrics: jest.fn(), })); process.env.CLIENT_ID = "client-1"; diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index 48ee53cf..28fcc6b9 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -39,7 +39,7 @@ import { recordDeliverySuccess, recordRetryWindowExhausted, } from "services/delivery-observability"; -import { flushMetrics } from "services/delivery-metrics"; +import { flushMetrics, resetMetrics } from "services/delivery-metrics"; type RedisClientType = Awaited>; @@ -69,6 +69,7 @@ async function checkAdmission( cbEnabled: boolean, clientId: string, record: SQSRecord, + correlationId?: string, ): Promise { const gateResult = await admit( redis, @@ -80,12 +81,16 @@ async function checkAdmission( if (!gateResult.allowed) { const delaySec = Math.ceil(gateResult.retryAfterMs / 1000); - recordAdmissionDenied(clientId, targetId, gateResult.reason); + recordAdmissionDenied(clientId, targetId, gateResult.reason, correlationId); await changeVisibility(record.receiptHandle, delaySec); throw new VisibilityManagedError(`Admission denied: ${gateResult.reason}`); } } +const OUTCOME_DELIVERED = "delivered" as const; +const OUTCOME_DLQ = "dlq" as const; +type RecordOutcome = typeof OUTCOME_DELIVERED | typeof OUTCOME_DLQ; + async function handleDeliveryResult( result: DeliveryResult, record: SQSRecord, @@ -93,16 +98,17 @@ async function handleDeliveryResult( clientId: string, targetId: string, cbEnabled: boolean, -): Promise { + correlationId?: string, +): Promise { if (result.outcome === OUTCOME_SUCCESS) { if (cbEnabled) { const cbOutcome = await recordResult(redis, targetId, true, gateConfig); if (cbOutcome.ok && cbOutcome.state === "closed") { - recordCircuitBreakerClosed(targetId); + recordCircuitBreakerClosed(targetId, correlationId); } } - recordDeliverySuccess(clientId, targetId); - return; + recordDeliverySuccess(clientId, targetId, correlationId); + return OUTCOME_DELIVERED; } if (result.outcome === OUTCOME_PERMANENT_FAILURE) { @@ -111,14 +117,15 @@ async function handleDeliveryResult( targetId, result.statusCode, result.errorCode, + correlationId, ); await sendToDlq(record.body, result); - return; + return OUTCOME_DLQ; } if (result.outcome === OUTCOME_RATE_LIMITED) { const receiveCount = Number(record.attributes.ApproximateReceiveCount); - recordDeliveryRateLimited(clientId, targetId); + recordDeliveryRateLimited(clientId, targetId, correlationId); await handleRateLimitedRecord( record, clientId, @@ -126,7 +133,7 @@ async function handleDeliveryResult( result.retryAfterHeader, receiveCount, ); - return; + return OUTCOME_DELIVERED; // unreachable — handleRateLimitedRecord always throws } const receiveCount = Number(record.attributes.ApproximateReceiveCount); @@ -134,10 +141,17 @@ async function handleDeliveryResult( if (cbEnabled) { const cbOutcome = await recordResult(redis, targetId, false, gateConfig); if (cbOutcome.state === "opened") { - recordCircuitBreakerOpen(targetId); + recordCircuitBreakerOpen(targetId, correlationId); } } - recordDeliveryFailure(clientId, targetId, result.statusCode, backoffSec); + recordDeliveryFailure( + clientId, + targetId, + result.statusCode, + backoffSec, + receiveCount, + correlationId, + ); await changeVisibility(record.receiptHandle, backoffSec); throw new VisibilityManagedError(`Transient failure: ${result.statusCode}`); } @@ -145,7 +159,7 @@ async function handleDeliveryResult( async function processRecord( record: SQSRecord, redis: RedisClientType, -): Promise { +): Promise { const { CLIENT_ID } = process.env; if (!CLIENT_ID) { throw new Error("CLIENT_ID is required"); @@ -153,8 +167,15 @@ async function processRecord( const message: CallbackDeliveryMessage = JSON.parse(record.body); const { payload, targetId } = message; + const messageId = payload.data[0]?.attributes?.messageId; - logger.info("Processing delivery", { clientId: CLIENT_ID, targetId }); + logger.info("Processing delivery", { + clientId: CLIENT_ID, + targetId, + messageId, + sqsMessageId: record.messageId, + receiveCount: record.attributes.ApproximateReceiveCount, + }); const target = await loadTargetConfig(CLIENT_ID, targetId); const maxRetryDurationMs = @@ -167,9 +188,9 @@ async function processRecord( ); if (isWindowExhausted(firstReceivedMs, maxRetryDurationMs)) { - recordRetryWindowExhausted(CLIENT_ID, targetId); + recordRetryWindowExhausted(CLIENT_ID, targetId, messageId); await sendToDlq(record.body); - return; + return OUTCOME_DLQ; } const applicationId = await getApplicationId(CLIENT_ID); @@ -182,6 +203,7 @@ async function processRecord( cbEnabled, CLIENT_ID, record, + messageId, ); const agent = await buildAgent(target); @@ -192,24 +214,29 @@ async function processRecord( ); const payloadJson = JSON.stringify(payload); - recordDeliveryAttempt(CLIENT_ID, targetId); + recordDeliveryAttempt(CLIENT_ID, targetId, messageId); const deliveryStart = Date.now(); const result = await deliverPayload(target, payloadJson, signature, agent); recordDeliveryDuration(targetId, Date.now() - deliveryStart); - await handleDeliveryResult( + return handleDeliveryResult( result, record, redis, CLIENT_ID, targetId, cbEnabled, + messageId, ); } export async function processRecords( records: SQSRecord[], ): Promise { + resetMetrics(); + + logger.info("Batch received", { batchSize: records.length }); + const concurrencyLimit = Number( process.env.CONCURRENCY_LIMIT ?? String(DEFAULT_CONCURRENCY_LIMIT), ); @@ -218,10 +245,9 @@ export async function processRecords( const results = await pMap( records, - async (record): Promise => { + async (record): Promise => { try { - await processRecord(record, redis); - return null; + return await processRecord(record, redis); } catch (error) { if (!(error instanceof VisibilityManagedError)) { logger.error("Failed to process record", { @@ -243,5 +269,16 @@ export async function processRecords( ); await flushMetrics(); - return results.filter((r): r is SQSBatchItemFailure => r !== null); + const failures = results.filter( + (r): r is SQSBatchItemFailure => typeof r === "object", + ); + const deliveredCount = results.filter((r) => r === OUTCOME_DELIVERED).length; + const dlqCount = results.filter((r) => r === OUTCOME_DLQ).length; + logger.info("Batch complete", { + batchSize: records.length, + deliveredCount, + dlqCount, + failureCount: failures.length, + }); + return failures; } diff --git a/lambdas/https-client-lambda/src/services/delivery-observability.ts b/lambdas/https-client-lambda/src/services/delivery-observability.ts index 50dbb30e..ed41df8a 100644 --- a/lambdas/https-client-lambda/src/services/delivery-observability.ts +++ b/lambdas/https-client-lambda/src/services/delivery-observability.ts @@ -15,17 +15,19 @@ import { export function recordDeliveryAttempt( clientId: string, targetId: string, + correlationId?: string, ): void { emitDeliveryAttempt(targetId); - logger.info("Attempting delivery", { clientId, targetId }); + logger.info("Attempting delivery", { clientId, targetId, correlationId }); } export function recordDeliverySuccess( clientId: string, targetId: string, + correlationId?: string, ): void { emitDeliverySuccess(targetId); - logger.info("Delivery succeeded", { clientId, targetId }); + logger.info("Delivery succeeded", { clientId, targetId, correlationId }); } export function recordDeliveryPermanentFailure( @@ -33,11 +35,13 @@ export function recordDeliveryPermanentFailure( targetId: string, statusCode?: number, errorCode?: string, + correlationId?: string, ): void { emitDeliveryPermanentFailure(targetId); logger.warn("Permanent delivery failure — sending to DLQ", { clientId, targetId, + correlationId, ...(statusCode !== undefined && { statusCode }), ...(errorCode !== undefined && { errorCode }), }); @@ -46,9 +50,10 @@ export function recordDeliveryPermanentFailure( export function recordDeliveryRateLimited( clientId: string, targetId: string, + correlationId?: string, ): void { emitRateLimited(targetId); - logger.info("Rate limited (429)", { clientId, targetId }); + logger.info("Rate limited (429)", { clientId, targetId, correlationId }); } export function recordDeliveryFailure( @@ -56,34 +61,46 @@ export function recordDeliveryFailure( targetId: string, statusCode: number, backoffSec: number, + receiveCount: number, + correlationId?: string, ): void { emitDeliveryFailure(targetId); logger.warn("Transient delivery failure — requeuing", { clientId, targetId, + correlationId, statusCode, backoffSec, + receiveCount, }); } -export function recordCircuitBreakerOpen(targetId: string): void { +export function recordCircuitBreakerOpen( + targetId: string, + correlationId?: string, +): void { emitCircuitBreakerOpen(targetId); - logger.warn("Circuit breaker opened", { targetId }); + logger.warn("Circuit breaker opened", { targetId, correlationId }); } -export function recordCircuitBreakerClosed(targetId: string): void { +export function recordCircuitBreakerClosed( + targetId: string, + correlationId?: string, +): void { emitCircuitBreakerClosed(targetId); - logger.info("Circuit breaker closed", { targetId }); + logger.info("Circuit breaker closed", { targetId, correlationId }); } export function recordRetryWindowExhausted( clientId: string, targetId: string, + correlationId?: string, ): void { emitRetryWindowExhausted(targetId); logger.warn("Retry window exhausted — sending to DLQ", { clientId, targetId, + correlationId, }); } @@ -91,9 +108,15 @@ export function recordAdmissionDenied( clientId: string, targetId: string, reason: string, + correlationId?: string, ): void { emitAdmissionDenied(targetId, reason); - logger.warn("Admission denied", { clientId, targetId, reason }); + logger.warn("Admission denied", { + clientId, + targetId, + correlationId, + reason, + }); } export function recordDeliveryDuration( diff --git a/lambdas/mock-webhook-lambda/src/index.ts b/lambdas/mock-webhook-lambda/src/index.ts index 414f66ac..d0bf582d 100644 --- a/lambdas/mock-webhook-lambda/src/index.ts +++ b/lambdas/mock-webhook-lambda/src/index.ts @@ -175,6 +175,7 @@ async function buildResponse( messageId, callbackType: item.type, path, + isMtls, apiKey: providedApiKey, signature: headers["x-hmac-sha256-signature"] ?? "", payload: JSON.stringify(item), diff --git a/scripts/tests/integration-env.sh b/scripts/tests/integration-env.sh index cd5ff1a8..9a889902 100644 --- a/scripts/tests/integration-env.sh +++ b/scripts/tests/integration-env.sh @@ -7,8 +7,12 @@ set -euo pipefail # Add new clients here: "fixture-filename.json:ENV_VAR_PREFIX" CLIENTS=( - "mock-client-1.json:MOCK_CLIENT" - "mock-client-2.json:MOCK_CLIENT_2" + "mock-client-single-target.json:MOCK_CLIENT" + "mock-client-fan-out.json:MOCK_CLIENT_FAN_OUT" + "mock-client-mtls.json:MOCK_CLIENT_MTLS" + "mock-client-rate-limit.json:MOCK_CLIENT_RATE_LIMIT" + "mock-client-circuit-breaker.json:MOCK_CLIENT_CIRCUIT_BREAKER" + "mock-client-short-retry.json:MOCK_CLIENT_SHORT_RETRY" ) for CLIENT_ENTRY in "${CLIENTS[@]}"; do diff --git a/tests/integration/delivery-resilience.test.ts b/tests/integration/delivery-resilience.test.ts new file mode 100644 index 00000000..8b218233 --- /dev/null +++ b/tests/integration/delivery-resilience.test.ts @@ -0,0 +1,254 @@ +import type { + MessageStatusData, + StatusPublishEvent, +} from "@nhs-notify-client-callbacks/models"; +import { + awaitCallback, + awaitCallbacks, + countLogEntries, +} from "./helpers/cloudwatch"; +import { createMessageStatusPublishEvent } from "./helpers/event-factories"; +import { + buildMockWebhookTargetPath, + getClientConfig, +} from "./helpers/mock-client-config"; +import { assertCallbackHeaders } from "./helpers/signature"; +import { + awaitQueueMessage, + deleteMessage, + getQueueDepth, + purgeQueues, + sendSqsEvent, +} from "./helpers/sqs"; +import { + type TestContext, + createTestContext, + destroyTestContext, +} from "./helpers/test-context"; + +describe("Delivery Resilience", () => { + let ctx: TestContext; + + beforeAll(() => { + ctx = createTestContext(); + }); + + afterAll(() => { + destroyTestContext(ctx); + }); + + describe("Retry & Window Exhaustion", () => { + let dlqUrl: string; + let deliveryUrl: string; + + beforeAll(async () => { + const { clientId } = getClientConfig("clientShortRetry"); + dlqUrl = ctx.clientDlqUrl(clientId); + deliveryUrl = ctx.clientDeliveryUrl(clientId); + await purgeQueues(ctx.sqs, [dlqUrl, deliveryUrl]); + }); + + afterAll(async () => { + await purgeQueues(ctx.sqs, [dlqUrl, deliveryUrl]); + }); + + it("should exhaust the retry window on persistent 5xx and route to DLQ", async () => { + const { clientId } = getClientConfig("clientShortRetry"); + const messageId = `force-500-${crypto.randomUUID()}`; + + const event: StatusPublishEvent = + createMessageStatusPublishEvent({ data: { clientId, messageId } }); + + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, event); + + const dlqMessage = await awaitQueueMessage(ctx.sqs, dlqUrl, 90_000); + + expect(dlqMessage.Body).toBeDefined(); + const dlqPayload = JSON.parse(dlqMessage.Body as string); + expect(dlqPayload.payload.data[0].attributes.messageId).toBe(messageId); + + const attemptCount = await countLogEntries( + ctx.cwLogs, + ctx.webhookLogGroup, + `{ $.msg = "Forced status code response" && $.messageId = "${messageId}" }`, + ctx.startTime, + 2, + ); + expect(attemptCount).toBeGreaterThan(1); + + await deleteMessage(ctx.sqs, dlqUrl, dlqMessage); + }, 180_000); + + it("should exhaust the retry window on persistent 429 and route to DLQ", async () => { + const { clientId } = getClientConfig("clientShortRetry"); + const messageId = `force-429-${crypto.randomUUID()}`; + + const event: StatusPublishEvent = + createMessageStatusPublishEvent({ data: { clientId, messageId } }); + + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, event); + + const dlqMessage = await awaitQueueMessage(ctx.sqs, dlqUrl, 90_000); + + expect(dlqMessage.Body).toBeDefined(); + const dlqPayload = JSON.parse(dlqMessage.Body as string); + expect(dlqPayload.payload.data[0].attributes.messageId).toBe(messageId); + + const attemptCount = await countLogEntries( + ctx.cwLogs, + ctx.webhookLogGroup, + `{ $.msg = "Forced status code response" && $.messageId = "${messageId}" }`, + ctx.startTime, + 2, + ); + expect(attemptCount).toBeGreaterThan(1); + + await deleteMessage(ctx.sqs, dlqUrl, dlqMessage); + }, 180_000); + }); + + describe("Rate Limiting", () => { + const BURST_SIZE = 15; + let dlqUrl: string; + let deliveryUrl: string; + let httpsClientLogGroup: string; + + beforeAll(async () => { + const { clientId } = getClientConfig("clientRateLimit"); + dlqUrl = ctx.clientDlqUrl(clientId); + deliveryUrl = ctx.clientDeliveryUrl(clientId); + httpsClientLogGroup = ctx.logGroup(`https-client-${clientId}`); + await purgeQueues(ctx.sqs, [dlqUrl, deliveryUrl]); + }); + + afterAll(async () => { + await purgeQueues(ctx.sqs, [dlqUrl, deliveryUrl]); + }); + + it("should eventually deliver all events in a burst without dropping any to the DLQ", async () => { + const rateLimitConfig = getClientConfig("clientRateLimit"); + const targetPath = buildMockWebhookTargetPath("clientRateLimit"); + + const events = Array.from({ length: BURST_SIZE }, () => + createMessageStatusPublishEvent({ + data: { + clientId: rateLimitConfig.clientId, + messageId: `rate-limit-burst-${crypto.randomUUID()}`, + }, + }), + ); + + await Promise.all( + events.map((event) => + sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, event), + ), + ); + + const callbackMap = await awaitCallbacks( + ctx.cwLogs, + ctx.webhookLogGroup, + events.map((e) => e.data.messageId), + "MessageStatus", + 1, + ctx.startTime, + ); + + const deliveredIds = [...callbackMap.keys()]; + const expectedIds = events.map((e) => e.data.messageId); + expect(deliveredIds).toHaveLength(expectedIds.length); + expect(deliveredIds).toEqual(expect.arrayContaining(expectedIds)); + + for (const [, [callback]] of callbackMap) { + expect(callback.path).toBe(targetPath); + assertCallbackHeaders( + callback, + rateLimitConfig.apiKeyVar, + rateLimitConfig.applicationIdVar, + ); + } + + expect(await getQueueDepth(ctx.sqs, dlqUrl)).toBe(0); + + const rateLimitedCount = await countLogEntries( + ctx.cwLogs, + httpsClientLogGroup, + `{ $.msg = "Admission denied" && $.reason = "rate_limited" }`, + ctx.startTime, + 1, + ); + expect(rateLimitedCount).toBeGreaterThanOrEqual(1); + }, 180_000); + }); + + describe("Circuit Breaker", () => { + const CB_BURST_SIZE = 15; + let dlqUrl: string; + let deliveryUrl: string; + let httpsClientLogGroup: string; + + beforeAll(async () => { + const { clientId } = getClientConfig("clientCircuitBreaker"); + dlqUrl = ctx.clientDlqUrl(clientId); + deliveryUrl = ctx.clientDeliveryUrl(clientId); + httpsClientLogGroup = ctx.logGroup(`https-client-${clientId}`); + await purgeQueues(ctx.sqs, [dlqUrl, deliveryUrl]); + }); + + afterAll(async () => { + await purgeQueues(ctx.sqs, [dlqUrl, deliveryUrl]); + }); + + it("should open the circuit breaker after repeated failures and not affect other clients", async () => { + const cbConfig = getClientConfig("clientCircuitBreaker"); + const singleTargetConfig = getClientConfig("clientSingleTarget"); + const singleTargetPath = buildMockWebhookTargetPath("clientSingleTarget"); + + const cbEvents = Array.from({ length: CB_BURST_SIZE }, () => + createMessageStatusPublishEvent({ + data: { + clientId: cbConfig.clientId, + messageId: `force-500-cb-${crypto.randomUUID()}`, + }, + }), + ); + + const normalEvent = createMessageStatusPublishEvent({ + data: { + clientId: singleTargetConfig.clientId, + messageId: `cb-isolation-${crypto.randomUUID()}`, + }, + }); + + await Promise.all([ + ...cbEvents.map((event) => + sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, event), + ), + sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, normalEvent), + ]); + + const normalCallback = await awaitCallback( + ctx.cwLogs, + ctx.webhookLogGroup, + normalEvent.data.messageId, + "MessageStatus", + ctx.startTime, + ); + + expect(normalCallback.path).toBe(singleTargetPath); + assertCallbackHeaders( + normalCallback, + singleTargetConfig.apiKeyVar, + singleTargetConfig.applicationIdVar, + ); + + const circuitOpenCount = await countLogEntries( + ctx.cwLogs, + httpsClientLogGroup, + `{ $.msg = "Admission denied" && $.reason = "circuit_open" }`, + ctx.startTime, + 1, + ); + expect(circuitOpenCount).toBeGreaterThanOrEqual(1); + }, 180_000); + }); +}); diff --git a/tests/integration/dlq-redrive.test.ts b/tests/integration/dlq-redrive.test.ts index 639eadc4..325c82ed 100644 --- a/tests/integration/dlq-redrive.test.ts +++ b/tests/integration/dlq-redrive.test.ts @@ -1,100 +1,76 @@ -import { GetQueueAttributesCommand, SQSClient } from "@aws-sdk/client-sqs"; -import { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; import type { MessageStatusData, StatusPublishEvent, } from "@nhs-notify-client-callbacks/models"; +import { awaitCallback, awaitCallbacks } from "./helpers/cloudwatch"; +import { createMessageStatusPublishEvent } from "./helpers/event-factories"; import { - buildInboundEventQueueUrl, - buildLambdaLogGroupName, - createCloudWatchLogsClient, - createSqsClient, - getDeploymentDetails, -} from "@nhs-notify-client-callbacks/test-support/helpers"; + CLIENT_FIXTURES, + type ClientFixtureKey, + getClientConfig, +} from "./helpers/mock-client-config"; +import sendEventToDlqAndRedrive from "./helpers/redrive"; import { assertCallbackHeaders } from "./helpers/signature"; import { - buildMockClientDlqQueueUrl, + awaitQueueMessage, + deleteMessage, ensureInboundQueueIsEmpty, + getQueueDepth, purgeQueues, sendSqsEvent, } from "./helpers/sqs"; import { - CLIENT_FIXTURES, - type ClientFixtureKey, - buildMockWebhookTargetPath, - getClientConfig, - getMockItClientConfig, -} from "./helpers/mock-client-config"; -import { awaitSignedCallbacksFromWebhookLogGroup } from "./helpers/cloudwatch"; -import { createMessageStatusPublishEvent } from "./helpers/event-factories"; -import sendEventToDlqAndRedrive from "./helpers/redrive"; + type TestContext, + createTestContext, + destroyTestContext, +} from "./helpers/test-context"; describe("DLQ Redrive", () => { - let sqsClient: SQSClient; - let cloudWatchClient: CloudWatchLogsClient; - let dlqQueueUrl!: string; - let allTargetDlqQueueUrls: string[]; - let inboundQueueUrl: string; - let webhookLogGroupName: string; + let ctx: TestContext; + let dlqUrl: string; + let deliveryUrl: string; + let allDlqUrls: string[]; beforeAll(async () => { - const deploymentDetails = getDeploymentDetails(); - const { clientId } = getMockItClientConfig(); - - sqsClient = createSqsClient(deploymentDetails); - cloudWatchClient = createCloudWatchLogsClient(deploymentDetails); - - inboundQueueUrl = buildInboundEventQueueUrl(deploymentDetails); - dlqQueueUrl = buildMockClientDlqQueueUrl(deploymentDetails, clientId); - allTargetDlqQueueUrls = ( - Object.keys(CLIENT_FIXTURES) as ClientFixtureKey[] - ).map((key) => - buildMockClientDlqQueueUrl( - deploymentDetails, - getClientConfig(key).clientId, - ), - ); - webhookLogGroupName = buildLambdaLogGroupName( - deploymentDetails, - "mock-webhook", + ctx = createTestContext(); + const { clientId } = getClientConfig("clientSingleTarget"); + + dlqUrl = ctx.clientDlqUrl(clientId); + deliveryUrl = ctx.clientDeliveryUrl(clientId); + allDlqUrls = (Object.keys(CLIENT_FIXTURES) as ClientFixtureKey[]).map( + (key) => ctx.clientDlqUrl(getClientConfig(key).clientId), ); - await purgeQueues(sqsClient, [inboundQueueUrl, ...allTargetDlqQueueUrls]); + await purgeQueues(ctx.sqs, [ + ctx.inboundQueueUrl, + deliveryUrl, + ...allDlqUrls, + ]); }); afterAll(async () => { - await purgeQueues(sqsClient, [inboundQueueUrl, ...allTargetDlqQueueUrls]); - sqsClient.destroy(); - cloudWatchClient.destroy(); + await purgeQueues(ctx.sqs, [ + ctx.inboundQueueUrl, + deliveryUrl, + ...allDlqUrls, + ]); + destroyTestContext(ctx); }); describe("Infrastructure validation", () => { it("should confirm a DLQ is accessible for all configured clients", async () => { - const responses = await Promise.all( - allTargetDlqQueueUrls.map((queueUrl) => - sqsClient.send( - new GetQueueAttributesCommand({ - QueueUrl: queueUrl, - AttributeNames: ["QueueArn", "ApproximateNumberOfMessages"], - }), - ), - ), + const depths = await Promise.all( + allDlqUrls.map((url) => getQueueDepth(ctx.sqs, url)), ); - for (const response of responses) { - expect(response.Attributes?.QueueArn).toBeDefined(); + for (const depth of depths) { + expect(depth).toBeGreaterThanOrEqual(0); } }); it("should confirm the inbound event queue exists and is accessible", async () => { - const response = await sqsClient.send( - new GetQueueAttributesCommand({ - QueueUrl: inboundQueueUrl, - AttributeNames: ["QueueArn", "ApproximateNumberOfMessages"], - }), - ); - - expect(response.Attributes?.QueueArn).toBeDefined(); + const depth = await getQueueDepth(ctx.sqs, ctx.inboundQueueUrl); + expect(depth).toBeGreaterThanOrEqual(0); }); }); @@ -103,33 +79,32 @@ describe("DLQ Redrive", () => { const startTime = Date.now(); const event: StatusPublishEvent = createMessageStatusPublishEvent(); + const { payload: redrivePayload } = await sendEventToDlqAndRedrive( - sqsClient, - dlqQueueUrl, - inboundQueueUrl, + ctx.sqs, + dlqUrl, + ctx.inboundQueueUrl, event, ); expect(redrivePayload.id).toBe(event.id); - await ensureInboundQueueIsEmpty(sqsClient, inboundQueueUrl); + await ensureInboundQueueIsEmpty(ctx.sqs, ctx.inboundQueueUrl); - const callbacks = await awaitSignedCallbacksFromWebhookLogGroup( - cloudWatchClient, - webhookLogGroupName, + const callback = await awaitCallback( + ctx.cwLogs, + ctx.webhookLogGroup, event.data.messageId, "MessageStatus", startTime, - buildMockWebhookTargetPath(), ); - expect(callbacks.length).toBeGreaterThan(0); - expect(callbacks[0].payload).toMatchObject({ + expect(callback.payload).toMatchObject({ type: "MessageStatus", attributes: expect.objectContaining({ messageStatus: "delivered", }), }); - assertCallbackHeaders(callbacks[0]); + assertCallbackHeaders(callback); }, 120_000); it("should apply the same transformation logic to redriven events as original deliveries", async () => { @@ -155,47 +130,90 @@ describe("DLQ Redrive", () => { }, }); - await sendSqsEvent(sqsClient, inboundQueueUrl, directEvent); + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, directEvent); const { payload: dlqPayload } = await sendEventToDlqAndRedrive( - sqsClient, - dlqQueueUrl, - inboundQueueUrl, + ctx.sqs, + dlqUrl, + ctx.inboundQueueUrl, redriveEvent, ); expect(dlqPayload.data.messageId).toBe(redriveEvent.data.messageId); - const [directCallbacks, redriveCallbacks] = await Promise.all([ - awaitSignedCallbacksFromWebhookLogGroup( - cloudWatchClient, - webhookLogGroupName, - directEvent.data.messageId, - "MessageStatus", - startTime, - buildMockWebhookTargetPath(), - ), - awaitSignedCallbacksFromWebhookLogGroup( - cloudWatchClient, - webhookLogGroupName, - redriveEvent.data.messageId, - "MessageStatus", - startTime, - buildMockWebhookTargetPath(), - ), - ]); - - await ensureInboundQueueIsEmpty(sqsClient, inboundQueueUrl); - - expect(redriveCallbacks[0].payload).toMatchObject({ - type: directCallbacks[0].payload.type, + const callbackMap = await awaitCallbacks( + ctx.cwLogs, + ctx.webhookLogGroup, + [directEvent.data.messageId, redriveEvent.data.messageId], + "MessageStatus", + 1, + startTime, + ); + + const directCallback = callbackMap.get(directEvent.data.messageId)![0]; + const redriveCallback = callbackMap.get(redriveEvent.data.messageId)![0]; + + await ensureInboundQueueIsEmpty(ctx.sqs, ctx.inboundQueueUrl); + + expect(redriveCallback.payload).toMatchObject({ + type: directCallback.payload.type, attributes: expect.objectContaining({ messageStatus: ( - directCallbacks[0].payload.attributes as { messageStatus?: string } + directCallback.payload.attributes as { messageStatus?: string } ).messageStatus, }), }); - assertCallbackHeaders(redriveCallbacks[0]); + assertCallbackHeaders(redriveCallback); }, 120_000); }); + + describe("Delivery DLQ redrive", () => { + it("should redrive a 4xx-failed message from the delivery DLQ back through the delivery queue", async () => { + const redriveStartTime = Date.now(); + const forceMessageId = `force-400-redrive-${crypto.randomUUID()}`; + + const failingEvent: StatusPublishEvent = + createMessageStatusPublishEvent({ + data: { messageId: forceMessageId }, + }); + + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, failingEvent); + + const dlqMessage = await awaitQueueMessage(ctx.sqs, dlqUrl, 90_000); + + expect(dlqMessage.Body).toBeDefined(); + expect(dlqMessage.MessageAttributes?.ERROR_CODE?.StringValue).toBe( + "HTTP_CLIENT_ERROR", + ); + + const dlqBody = JSON.parse(dlqMessage.Body as string) as { + payload: { data: { attributes: { messageId: string } }[] }; + subscriptionId: string; + targetId: string; + }; + + const redriveMessageId = `redriven-dlq-${crypto.randomUUID()}`; + dlqBody.payload.data[0].attributes.messageId = redriveMessageId; + + await sendSqsEvent(ctx.sqs, deliveryUrl, dlqBody); + await deleteMessage(ctx.sqs, dlqUrl, dlqMessage); + + const callback = await awaitCallback( + ctx.cwLogs, + ctx.webhookLogGroup, + redriveMessageId, + "MessageStatus", + redriveStartTime, + ); + + expect(callback.payload).toMatchObject({ + type: "MessageStatus", + attributes: expect.objectContaining({ + messageId: redriveMessageId, + messageStatus: "delivered", + }), + }); + assertCallbackHeaders(callback); + }, 180_000); + }); }); diff --git a/tests/integration/fixtures/subscriptions/mock-client-2.json b/tests/integration/fixtures/subscriptions/mock-client-fan-out.json similarity index 97% rename from tests/integration/fixtures/subscriptions/mock-client-2.json rename to tests/integration/fixtures/subscriptions/mock-client-fan-out.json index 50997fb6..14985d4c 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-2.json +++ b/tests/integration/fixtures/subscriptions/mock-client-fan-out.json @@ -1,5 +1,5 @@ { - "clientId": "mock-client-2", + "clientId": "mock-client-fan-out", "subscriptions": [ { "messageStatuses": [ diff --git a/tests/integration/fixtures/subscriptions/mock-client-short-retry.json b/tests/integration/fixtures/subscriptions/mock-client-short-retry.json new file mode 100644 index 00000000..d6528102 --- /dev/null +++ b/tests/integration/fixtures/subscriptions/mock-client-short-retry.json @@ -0,0 +1,38 @@ +{ + "clientId": "mock-client-short-retry", + "subscriptions": [ + { + "messageStatuses": [ + "DELIVERED", + "FAILED" + ], + "subscriptionId": "sub-sr-msg-001", + "subscriptionType": "MessageStatus", + "targetIds": [ + "target-sr-001" + ] + } + ], + "targets": [ + { + "apiKey": { + "headerName": "x-api-key", + "headerValue": "REPLACED_BY_TERRAFORM" + }, + "delivery": { + "maxRetryDurationSeconds": 10, + "mtls": { + "certPinning": { + "enabled": false + }, + "enabled": false + } + }, + "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", + "invocationMethod": "POST", + "invocationRateLimit": 10, + "targetId": "target-sr-001", + "type": "API" + } + ] +} diff --git a/tests/integration/fixtures/subscriptions/mock-client-1.json b/tests/integration/fixtures/subscriptions/mock-client-single-target.json similarity index 96% rename from tests/integration/fixtures/subscriptions/mock-client-1.json rename to tests/integration/fixtures/subscriptions/mock-client-single-target.json index 4dd6c078..41422f4b 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-1.json +++ b/tests/integration/fixtures/subscriptions/mock-client-single-target.json @@ -1,5 +1,5 @@ { - "clientId": "mock-client-1", + "clientId": "mock-client-single-target", "subscriptions": [ { "messageStatuses": [ diff --git a/tests/integration/helpers/cloudwatch.ts b/tests/integration/helpers/cloudwatch.ts index 9ee13739..d66b18ce 100644 --- a/tests/integration/helpers/cloudwatch.ts +++ b/tests/integration/helpers/cloudwatch.ts @@ -6,266 +6,255 @@ import { logger } from "@nhs-notify-client-callbacks/logger"; import type { CallbackItem } from "@nhs-notify-client-callbacks/models"; import { TimeoutError, waitUntil } from "async-wait-until"; -const CALLBACK_WAIT_TIMEOUT_MS = 60_000; -const METRICS_WAIT_TIMEOUT_MS = 60_000; +const WAIT_TIMEOUT_MS = 60_000; const POLL_INTERVAL_MS = 2000; -const CLOUDWATCH_QUERY_LOOKBACK_MS = Number( - process.env.CLOUDWATCH_QUERY_LOOKBACK_MS ?? 5000, -); +const LOOKBACK_MS = Number(process.env.CLOUDWATCH_QUERY_LOOKBACK_MS ?? 5000); type LogEntry = { msg: string; - correlationId?: string; + messageId?: string; callbackType?: string; - clientId?: string; apiKey?: string; signature?: string; payload?: string; path?: string; + isMtls?: boolean; }; export type SignedCallback = { payload: CallbackItem; path: string; + isMtls: boolean; headers: { "x-api-key": string; "x-hmac-sha256-signature": string; }; }; -async function querySignedCallbacksFromWebhookLogGroup( - client: CloudWatchLogsClient, - logGroupName: string, - messageId: string, - callbackType: CallbackItem["type"], - startTime: number, -): Promise { - const filterPattern = `{ $.msg = "Callback received" && $.messageId = "${messageId}" && $.callbackType = "${callbackType}" }`; - const queryStartTime = Math.max(0, startTime - CLOUDWATCH_QUERY_LOOKBACK_MS); - - const response = await client.send( - new FilterLogEventsCommand({ - logGroupName, - startTime: queryStartTime, - filterPattern, - }), - ); - - const events = response.events ?? []; - const callbacks: SignedCallback[] = []; - - for (const event of events) { - if (event.message) { - try { - const entry = JSON.parse(event.message) as LogEntry; - if (entry.signature !== undefined && entry.payload) { - callbacks.push({ - payload: JSON.parse(entry.payload) as CallbackItem, - path: entry.path ?? "", - headers: { - "x-api-key": entry.apiKey ?? "", - "x-hmac-sha256-signature": entry.signature, - }, - }); - } - } catch { - // skip unparseable entries - } - } - } - - return callbacks; -} - -async function pollUntilFound( - poll: () => Promise, - timeoutMs: number, - timeoutMessage: string, -): Promise { - let results: T[] = []; - +// eslint-disable-next-line sonarjs/function-return-type -- returns SignedCallback | undefined consistently +function parseCallback( + message: string, + messageIdSet: Set, +): SignedCallback | undefined { try { - await waitUntil( - async () => { - results = await poll(); - return results.length > 0; + const entry = JSON.parse(message) as LogEntry; + if ( + !entry.messageId || + !messageIdSet.has(entry.messageId) || + entry.signature === undefined || + !entry.payload + ) + return undefined; + + return { + payload: JSON.parse(entry.payload) as CallbackItem, + path: entry.path ?? "", + isMtls: entry.isMtls ?? false, + headers: { + "x-api-key": entry.apiKey ?? "", + "x-hmac-sha256-signature": entry.signature, }, - { timeout: timeoutMs, intervalBetweenAttempts: POLL_INTERVAL_MS }, - ); - } catch (error) { - if (error instanceof TimeoutError) { - logger.warn(timeoutMessage); - } else { - throw error; - } + }; + } catch { + return undefined; } - - return results; } -export async function awaitSignedCallbacksFromWebhookLogGroup( +async function pollCallbacks( client: CloudWatchLogsClient, logGroupName: string, - messageId: string, + messageIds: string[], callbackType: CallbackItem["type"], + expectedPerMessage: number, startTime: number, - path: string, ): Promise { - const queryStartTime = Math.max(0, startTime - CLOUDWATCH_QUERY_LOOKBACK_MS); - logger.debug( - `Waiting for callback in webhook CloudWatch log group (messageId=${messageId}, path=${path}, logGroup=${logGroupName}, startTimeIso=${new Date(startTime).toISOString()}, queryStartTimeIso=${new Date(queryStartTime).toISOString()}, lookbackMs=${CLOUDWATCH_QUERY_LOOKBACK_MS})`, - ); - - const callbacks = await pollUntilFound( - () => - querySignedCallbacksFromWebhookLogGroup( - client, - logGroupName, - messageId, - callbackType, - startTime, - ), - CALLBACK_WAIT_TIMEOUT_MS, - `Timed out waiting for callback in webhook CloudWatch log group (messageId=${messageId}, callbackType=${callbackType}, path=${path}, timeoutMs=${CALLBACK_WAIT_TIMEOUT_MS})`, - ); - - if (callbacks.length !== 1) { - throw new Error( - `Expected exactly 1 callback for messageId="${messageId}" callbackType="${callbackType}", but found ${callbacks.length}`, - ); - } - - if (callbacks[0].path !== path) { - throw new Error( - `Expected callback path "${path}" for messageId="${messageId}", but got "${callbacks[0].path}"`, - ); - } - - return callbacks; -} + const messageIdSet = new Set(messageIds); + const expectedTotal = messageIds.length * expectedPerMessage; + const queryStartTime = Math.max(0, startTime - LOOKBACK_MS); + const filterPattern = `{ $.msg = "Callback received" && $.callbackType = "${callbackType}" }`; -export async function awaitSignedCallbacksByCountFromWebhookLogGroup( - client: CloudWatchLogsClient, - logGroupName: string, - messageId: string, - callbackType: CallbackItem["type"], - expectedCount: number, - startTime: number, -): Promise { logger.debug( - `Waiting for callbacks in webhook CloudWatch log group (messageId=${messageId}, callbackType=${callbackType}, expectedCount=${expectedCount}, logGroup=${logGroupName})`, + `Waiting for ${expectedTotal} callback(s) (type=${callbackType}, messages=${messageIds.length}, logGroup=${logGroupName})`, ); - let callbacks: SignedCallback[] = []; + let matched: SignedCallback[] = []; try { await waitUntil( async () => { - callbacks = await querySignedCallbacksFromWebhookLogGroup( - client, - logGroupName, - messageId, - callbackType, - startTime, + const response = await client.send( + new FilterLogEventsCommand({ + logGroupName, + startTime: queryStartTime, + filterPattern, + }), ); - return callbacks.length === expectedCount; - }, - { - timeout: CALLBACK_WAIT_TIMEOUT_MS, - intervalBetweenAttempts: POLL_INTERVAL_MS, + + matched = (response.events ?? []) + .filter((event): event is typeof event & { message: string } => + Boolean(event.message), + ) + .map((event) => parseCallback(event.message, messageIdSet)) + .filter((cb): cb is SignedCallback => cb !== undefined); + + return matched.length >= expectedTotal; }, + { timeout: WAIT_TIMEOUT_MS, intervalBetweenAttempts: POLL_INTERVAL_MS }, ); } catch (error) { if (error instanceof TimeoutError) { logger.warn( - `Timed out waiting for callbacks in webhook CloudWatch log group (messageId=${messageId}, callbackType=${callbackType}, expectedCount=${expectedCount}, timeoutMs=${CALLBACK_WAIT_TIMEOUT_MS})`, + `Timed out waiting for callbacks (expected=${expectedTotal}, found=${matched.length})`, ); } else { throw error; } } - if (callbacks.length !== expectedCount) { + if (matched.length !== expectedTotal) { + const foundIds = new Set( + matched.map( + (cb) => + (cb.payload.attributes as { messageId?: string }).messageId ?? "", + ), + ); + const missingIds = messageIds.filter((id) => !foundIds.has(id)); + logger.warn("Missing callbacks", { + callbackType, + expectedTotal, + foundCount: matched.length, + missingIds, + }); throw new Error( - `Expected exactly ${expectedCount} callbacks for messageId="${messageId}" callbackType="${callbackType}", but found ${callbacks.length}`, + `Expected ${expectedTotal} callback(s) for type="${callbackType}", found ${matched.length}`, ); } - return callbacks; + return matched; } -type EmfEntry = Record; - -function collectMetricNamesFromEvent( - message: string, - metricNames: string[], - found: Set, -): void { - try { - const entry = JSON.parse(message) as EmfEntry; - if (entry._aws) { - for (const name of metricNames) { - if (name in entry) found.add(name); - } - } - } catch { - // skip unparseable entries - } +export async function awaitCallback( + client: CloudWatchLogsClient, + logGroupName: string, + messageId: string, + callbackType: CallbackItem["type"], + startTime: number, +): Promise { + const [callback] = await pollCallbacks( + client, + logGroupName, + [messageId], + callbackType, + 1, + startTime, + ); + return callback; } -async function queryEmfMetricsFromLogGroup( +export async function awaitCallbacks( client: CloudWatchLogsClient, logGroupName: string, - metricNames: string[], + messageIds: string[], + callbackType: CallbackItem["type"], + expectedPerMessage: number, startTime: number, -): Promise> { - const queryStartTime = Math.max(0, startTime - CLOUDWATCH_QUERY_LOOKBACK_MS); - const conditions = metricNames.map((name) => `$.${name} > 0`).join(" || "); - const filterPattern = `{ ${conditions} }`; - - const response = await client.send( - new FilterLogEventsCommand({ - logGroupName, - startTime: queryStartTime, - filterPattern, - }), +): Promise> { + const results = await pollCallbacks( + client, + logGroupName, + messageIds, + callbackType, + expectedPerMessage, + startTime, ); - const found = new Set(); - for (const event of response.events ?? []) { - if (event.message) { - collectMetricNamesFromEvent(event.message, metricNames, found); - } + const map = new Map(); + for (const cb of results) { + const id = + (cb.payload.attributes as { messageId?: string }).messageId ?? ""; + const existing = map.get(id) ?? []; + existing.push(cb); + map.set(id, existing); } - return found; + return map; } -export async function awaitAllEmfMetricsInLogGroup( +export async function awaitEmfMetrics( client: CloudWatchLogsClient, logGroupName: string, metricNames: string[], startTime: number, ): Promise { - const queryStartTime = Math.max(0, startTime - CLOUDWATCH_QUERY_LOOKBACK_MS); - const queryStartTimeIso = new Date(queryStartTime).toISOString(); - const startTimeIso = new Date(startTime).toISOString(); + const queryStartTime = Math.max(0, startTime - LOOKBACK_MS); + const conditions = metricNames.map((name) => `$.${name} > 0`).join(" || "); + const filterPattern = `{ ${conditions} }`; + logger.debug( - `Waiting for EMF metrics in CloudWatch log group (metrics=${metricNames.join(",")}, logGroup=${logGroupName}, startTimeIso=${startTimeIso}, queryStartTimeIso=${queryStartTimeIso}, lookbackMs=${CLOUDWATCH_QUERY_LOOKBACK_MS})`, + `Waiting for EMF metrics [${metricNames.join(", ")}] in ${logGroupName}`, ); await waitUntil( async () => { - const found = await queryEmfMetricsFromLogGroup( - client, - logGroupName, - metricNames, - startTime, + const response = await client.send( + new FilterLogEventsCommand({ + logGroupName, + startTime: queryStartTime, + filterPattern, + }), ); + + const found = new Set(); + for (const event of response.events ?? []) { + try { + const entry = JSON.parse(event.message ?? "") as Record< + string, + unknown + >; + if (entry._aws) { + for (const name of metricNames) { + if (name in entry) found.add(name); + } + } + } catch { + // skip unparseable entries + } + } return metricNames.every((name) => found.has(name)); }, - { - timeout: METRICS_WAIT_TIMEOUT_MS, - intervalBetweenAttempts: POLL_INTERVAL_MS, - }, + { timeout: WAIT_TIMEOUT_MS, intervalBetweenAttempts: POLL_INTERVAL_MS }, ); } + +export async function countLogEntries( + client: CloudWatchLogsClient, + logGroupName: string, + filterPattern: string, + startTime: number, + minCount: number, +): Promise { + const queryStartTime = Math.max(0, startTime - LOOKBACK_MS); + + let count = 0; + try { + await waitUntil( + async () => { + const response = await client.send( + new FilterLogEventsCommand({ + logGroupName, + startTime: queryStartTime, + filterPattern, + }), + ); + count = (response.events ?? []).length; + return count >= minCount; + }, + { timeout: WAIT_TIMEOUT_MS, intervalBetweenAttempts: POLL_INTERVAL_MS }, + ); + } catch (error) { + if (!(error instanceof TimeoutError)) { + throw error; + } + } + + return count; +} diff --git a/tests/integration/helpers/event-factories.ts b/tests/integration/helpers/event-factories.ts index 35f7f2e8..015bbced 100644 --- a/tests/integration/helpers/event-factories.ts +++ b/tests/integration/helpers/event-factories.ts @@ -1,6 +1,5 @@ import type { ChannelStatusData, - ClientCallbackPayload, MessageStatusData, StatusPublishEvent, } from "@nhs-notify-client-callbacks/models"; @@ -18,37 +17,6 @@ type ChannelEventOverrides = { data?: Partial; }; -type DeliveryMessage = { - payload: ClientCallbackPayload; - subscriptions: string[]; - targetId: string; -}; - -export function createDeliveryMessage( - overrides?: Partial, -): DeliveryMessage { - const config = getMockItClientConfig(); - const targetId = - overrides?.targetId ?? config.targets[0]?.targetId ?? "target-001"; - - return { - payload: - overrides?.payload ?? - ({ - data: [ - { - type: "MessageStatus", - attributes: { messageStatus: "delivered" }, - links: { message: "https://api.example.invalid/messages/msg-001" }, - meta: { idempotencyKey: crypto.randomUUID() }, - }, - ], - } as ClientCallbackPayload), - subscriptions: overrides?.subscriptions ?? ["sub-001"], - targetId, - }; -} - export function createMessageStatusPublishEvent( overrides?: MessageEventOverrides, ): StatusPublishEvent { diff --git a/tests/integration/helpers/mock-client-config.ts b/tests/integration/helpers/mock-client-config.ts index 950e699d..52bb3570 100644 --- a/tests/integration/helpers/mock-client-config.ts +++ b/tests/integration/helpers/mock-client-config.ts @@ -1,6 +1,6 @@ import { readFileSync } from "node:fs"; import path from "node:path"; -import type seedConfigJson from "../fixtures/subscriptions/mock-client-1.json"; +import type seedConfigJson from "../fixtures/subscriptions/mock-client-single-target.json"; type ClientFixtureShape = typeof seedConfigJson; @@ -10,21 +10,36 @@ export type MockItClientConfig = ClientFixtureShape & { }; export const CLIENT_FIXTURES = { - client1: { - fixture: "mock-client-1.json", + clientSingleTarget: { + fixture: "mock-client-single-target.json", apiKeyVar: "MOCK_CLIENT_API_KEY", applicationIdVar: "MOCK_CLIENT_APPLICATION_ID", }, - client2: { - fixture: "mock-client-2.json", - apiKeyVar: "MOCK_CLIENT_2_API_KEY", - applicationIdVar: "MOCK_CLIENT_2_APPLICATION_ID", + clientFanOut: { + fixture: "mock-client-fan-out.json", + apiKeyVar: "MOCK_CLIENT_FAN_OUT_API_KEY", + applicationIdVar: "MOCK_CLIENT_FAN_OUT_APPLICATION_ID", }, clientMtls: { fixture: "mock-client-mtls.json", apiKeyVar: "MOCK_CLIENT_MTLS_API_KEY", applicationIdVar: "MOCK_CLIENT_MTLS_APPLICATION_ID", }, + clientRateLimit: { + fixture: "mock-client-rate-limit.json", + apiKeyVar: "MOCK_CLIENT_RATE_LIMIT_API_KEY", + applicationIdVar: "MOCK_CLIENT_RATE_LIMIT_APPLICATION_ID", + }, + clientCircuitBreaker: { + fixture: "mock-client-circuit-breaker.json", + apiKeyVar: "MOCK_CLIENT_CIRCUIT_BREAKER_API_KEY", + applicationIdVar: "MOCK_CLIENT_CIRCUIT_BREAKER_APPLICATION_ID", + }, + clientShortRetry: { + fixture: "mock-client-short-retry.json", + apiKeyVar: "MOCK_CLIENT_SHORT_RETRY_API_KEY", + applicationIdVar: "MOCK_CLIENT_SHORT_RETRY_APPLICATION_ID", + }, } as const; export type ClientFixtureKey = keyof typeof CLIENT_FIXTURES; @@ -45,11 +60,7 @@ export function getClientConfig(key: ClientFixtureKey): MockItClientConfig { } export function getMockItClientConfig(): MockItClientConfig { - return getClientConfig("client1"); -} - -export function getMockItClient2Config(): MockItClientConfig { - return getClientConfig("client2"); + return getClientConfig("clientSingleTarget"); } function buildWebhookTargetPaths(key: ClientFixtureKey): string[] { @@ -58,7 +69,7 @@ function buildWebhookTargetPaths(key: ClientFixtureKey): string[] { } export function buildMockWebhookTargetPath( - key: ClientFixtureKey = "client1", + key: ClientFixtureKey = "clientSingleTarget", ): string { const paths = buildWebhookTargetPaths(key); @@ -70,7 +81,7 @@ export function buildMockWebhookTargetPath( } export function buildMockWebhookTargetPaths( - key: ClientFixtureKey = "client1", + key: ClientFixtureKey = "clientSingleTarget", ): string[] { return buildWebhookTargetPaths(key); } diff --git a/tests/integration/helpers/sqs.ts b/tests/integration/helpers/sqs.ts index 747f746b..5cdcc3a9 100644 --- a/tests/integration/helpers/sqs.ts +++ b/tests/integration/helpers/sqs.ts @@ -1,5 +1,6 @@ import { ChangeMessageVisibilityCommand, + DeleteMessageCommand, GetQueueAttributesCommand, type Message, PurgeQueueCommand, @@ -168,6 +169,7 @@ async function receiveOneMessage(client: SQSClient, queueUrl: string) { export async function awaitQueueMessage( client: SQSClient, queueUrl: string, + timeoutMs: number = QUEUE_WAIT_TIMEOUT_MS, ): Promise { let message: Message | undefined; @@ -179,13 +181,13 @@ export async function awaitQueueMessage( }, { intervalBetweenAttempts: POLL_INTERVAL_MS, - timeout: QUEUE_WAIT_TIMEOUT_MS, + timeout: timeoutMs, }, ); if (!message) { throw new Error( - `Timed out after ${QUEUE_WAIT_TIMEOUT_MS}ms waiting for a message to appear in ${queueUrl}`, + `Timed out after ${timeoutMs}ms waiting for a message to appear in ${queueUrl}`, ); } @@ -249,3 +251,26 @@ export async function awaitQueueMessageByMessageId( return matchedMessage; } + +export async function deleteMessage( + client: SQSClient, + queueUrl: string, + message: Message, +): Promise { + await client.send( + new DeleteMessageCommand({ + QueueUrl: queueUrl, + ReceiptHandle: message.ReceiptHandle!, + }), + ); +} + +export async function getQueueDepth( + client: SQSClient, + queueUrl: string, +): Promise { + return getQueueMessageCount(client, queueUrl, [ + "ApproximateNumberOfMessages", + "ApproximateNumberOfMessagesNotVisible", + ]); +} diff --git a/tests/integration/helpers/status-events.ts b/tests/integration/helpers/status-events.ts deleted file mode 100644 index 1bccf0bb..00000000 --- a/tests/integration/helpers/status-events.ts +++ /dev/null @@ -1,89 +0,0 @@ -import { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; -import { SQSClient } from "@aws-sdk/client-sqs"; -import type { - ChannelStatusData, - MessageStatusData, - StatusPublishEvent, -} from "@nhs-notify-client-callbacks/models"; - -import { - type SignedCallback, - awaitSignedCallbacksFromWebhookLogGroup, -} from "./cloudwatch"; -import { ensureInboundQueueIsEmpty, sendSqsEvent } from "./sqs"; - -async function processStatusEvent< - T extends MessageStatusData | ChannelStatusData, ->( - { - CloudWatchLogsClient: cloudWatchClient, - SQSClient: sqsClient, - }: { CloudWatchLogsClient: CloudWatchLogsClient; SQSClient: SQSClient }, - callbackEventQueueUrl: string, - webhookLogGroupName: string, - event: StatusPublishEvent, - callbackType: SignedCallback["payload"]["type"], - webhookPath: string, - startTime: number, -): Promise { - const sendMessageResponse = await sendSqsEvent( - sqsClient, - callbackEventQueueUrl, - event, - ); - - if (!sendMessageResponse.MessageId) { - throw new Error("Expected SQS send response to include MessageId"); - } - - await ensureInboundQueueIsEmpty(sqsClient, callbackEventQueueUrl); - - return awaitSignedCallbacksFromWebhookLogGroup( - cloudWatchClient, - webhookLogGroupName, - event.data.messageId, - callbackType, - startTime, - webhookPath, - ); -} - -export async function processMessageStatusEvent( - sqsClient: SQSClient, - cloudWatchClient: CloudWatchLogsClient, - callbackEventQueueUrl: string, - webhookLogGroupName: string, - messageStatusEvent: StatusPublishEvent, - webhookPath: string, - startTime: number, -): Promise { - return processStatusEvent( - { CloudWatchLogsClient: cloudWatchClient, SQSClient: sqsClient }, - callbackEventQueueUrl, - webhookLogGroupName, - messageStatusEvent, - "MessageStatus", - webhookPath, - startTime, - ); -} - -export async function processChannelStatusEvent( - sqsClient: SQSClient, - cloudWatchClient: CloudWatchLogsClient, - callbackEventQueueUrl: string, - webhookLogGroupName: string, - channelStatusEvent: StatusPublishEvent, - webhookPath: string, - startTime: number, -): Promise { - return processStatusEvent( - { CloudWatchLogsClient: cloudWatchClient, SQSClient: sqsClient }, - callbackEventQueueUrl, - webhookLogGroupName, - channelStatusEvent, - "ChannelStatus", - webhookPath, - startTime, - ); -} diff --git a/tests/integration/helpers/test-context.ts b/tests/integration/helpers/test-context.ts new file mode 100644 index 00000000..df5a31f5 --- /dev/null +++ b/tests/integration/helpers/test-context.ts @@ -0,0 +1,52 @@ +import type { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; +import type { SQSClient } from "@aws-sdk/client-sqs"; +import type { DeploymentDetails } from "@nhs-notify-client-callbacks/test-support/helpers"; +import { + buildInboundEventDlqQueueUrl, + buildInboundEventQueueUrl, + buildLambdaLogGroupName, + createCloudWatchLogsClient, + createSqsClient, + getDeploymentDetails, +} from "@nhs-notify-client-callbacks/test-support/helpers"; +import { + buildMockClientDeliveryQueueUrl, + buildMockClientDlqQueueUrl, +} from "./sqs"; + +export type TestContext = { + sqs: SQSClient; + cwLogs: CloudWatchLogsClient; + deployment: DeploymentDetails; + inboundQueueUrl: string; + inboundDlqUrl: string; + webhookLogGroup: string; + startTime: number; + clientDlqUrl(clientId: string): string; + clientDeliveryUrl(clientId: string): string; + logGroup(name: string): string; +}; + +export function createTestContext(): TestContext { + const deployment = getDeploymentDetails(); + + return { + sqs: createSqsClient(deployment), + cwLogs: createCloudWatchLogsClient(deployment), + deployment, + inboundQueueUrl: buildInboundEventQueueUrl(deployment), + inboundDlqUrl: buildInboundEventDlqQueueUrl(deployment), + webhookLogGroup: buildLambdaLogGroupName(deployment, "mock-webhook"), + startTime: Date.now(), + clientDlqUrl: (clientId) => + buildMockClientDlqQueueUrl(deployment, clientId), + clientDeliveryUrl: (clientId) => + buildMockClientDeliveryQueueUrl(deployment, clientId), + logGroup: (name) => buildLambdaLogGroupName(deployment, name), + }; +} + +export function destroyTestContext(ctx: TestContext): void { + ctx.sqs.destroy(); + ctx.cwLogs.destroy(); +} diff --git a/tests/integration/inbound-sqs-to-webhook.test.ts b/tests/integration/inbound-sqs-to-webhook.test.ts index d75ad402..13d8ec14 100644 --- a/tests/integration/inbound-sqs-to-webhook.test.ts +++ b/tests/integration/inbound-sqs-to-webhook.test.ts @@ -1,19 +1,9 @@ -import { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; -import { DeleteMessageCommand, SQSClient } from "@aws-sdk/client-sqs"; -import { - type ChannelStatusData, - type MessageStatusData, - type StatusPublishEvent, +import type { + ChannelStatusData, + MessageStatusData, + StatusPublishEvent, } from "@nhs-notify-client-callbacks/models"; -import { - buildInboundEventDlqQueueUrl, - buildInboundEventQueueUrl, - buildLambdaLogGroupName, - createCloudWatchLogsClient, - createSqsClient, - getDeploymentDetails, -} from "@nhs-notify-client-callbacks/test-support/helpers"; -import { awaitSignedCallbacksByCountFromWebhookLogGroup } from "./helpers/cloudwatch"; +import { awaitCallback, awaitCallbacks } from "./helpers/cloudwatch"; import { createChannelStatusPublishEvent, createMessageStatusPublishEvent, @@ -21,151 +11,112 @@ import { import { buildMockWebhookTargetPath, buildMockWebhookTargetPaths, - getMockItClient2Config, - getMockItClientConfig, + getClientConfig, } from "./helpers/mock-client-config"; import { assertCallbackHeaders } from "./helpers/signature"; import { awaitQueueMessage, awaitQueueMessageByMessageId, - buildMockClientDeliveryQueueUrl, - buildMockClientDlqQueueUrl, + deleteMessage, ensureInboundQueueIsEmpty, purgeQueues, sendSqsEvent, } from "./helpers/sqs"; import { - processChannelStatusEvent, - processMessageStatusEvent, -} from "./helpers/status-events"; - -function compareStrings(a: string, b: string): number { - if (a > b) return 1; - if (a < b) return -1; - return 0; -} + type TestContext, + createTestContext, + destroyTestContext, +} from "./helpers/test-context"; describe("SQS to Webhook Integration", () => { - let sqsClient: SQSClient; - let cloudWatchClient: CloudWatchLogsClient; - let callbackEventQueueUrl: string; - let clientDlqQueueUrl: string; - let clientDeliveryQueueUrl: string; - let inboundEventDlqQueueUrl: string; - let webhookLogGroupName: string; - let webhookTargetPath: string; - let startTime: number; + let ctx: TestContext; + let clientDlqUrl: string; + let clientDeliveryUrl: string; beforeAll(async () => { - const deploymentDetails = getDeploymentDetails(); - const { clientId } = getMockItClientConfig(); - - sqsClient = createSqsClient(deploymentDetails); - cloudWatchClient = createCloudWatchLogsClient(deploymentDetails); - callbackEventQueueUrl = buildInboundEventQueueUrl(deploymentDetails); - clientDlqQueueUrl = buildMockClientDlqQueueUrl(deploymentDetails, clientId); - clientDeliveryQueueUrl = buildMockClientDeliveryQueueUrl( - deploymentDetails, - clientId, - ); - inboundEventDlqQueueUrl = buildInboundEventDlqQueueUrl(deploymentDetails); - webhookLogGroupName = buildLambdaLogGroupName( - deploymentDetails, - "mock-webhook", - ); - webhookTargetPath = buildMockWebhookTargetPath(); - startTime = Date.now(); - await purgeQueues(sqsClient, [ - inboundEventDlqQueueUrl, - clientDlqQueueUrl, - clientDeliveryQueueUrl, - callbackEventQueueUrl, + ctx = createTestContext(); + const { clientId } = getClientConfig("clientSingleTarget"); + clientDlqUrl = ctx.clientDlqUrl(clientId); + clientDeliveryUrl = ctx.clientDeliveryUrl(clientId); + await purgeQueues(ctx.sqs, [ + ctx.inboundDlqUrl, + clientDlqUrl, + clientDeliveryUrl, + ctx.inboundQueueUrl, ]); }); afterAll(async () => { - await purgeQueues(sqsClient, [ - inboundEventDlqQueueUrl, - clientDlqQueueUrl, - clientDeliveryQueueUrl, - callbackEventQueueUrl, + await purgeQueues(ctx.sqs, [ + ctx.inboundDlqUrl, + clientDlqUrl, + clientDeliveryUrl, + ctx.inboundQueueUrl, ]); - - sqsClient.destroy(); - cloudWatchClient.destroy(); + destroyTestContext(ctx); }); describe("Message Status Event Flow", () => { it("should process message status event from SQS to webhook", async () => { - const messageStatusEvent: StatusPublishEvent = - createMessageStatusPublishEvent(); - - const callbacks = await processMessageStatusEvent( - sqsClient, - cloudWatchClient, - callbackEventQueueUrl, - webhookLogGroupName, - messageStatusEvent, - webhookTargetPath, - startTime, - ); + const event = createMessageStatusPublishEvent(); - expect(callbacks).toHaveLength(1); + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, event); + await ensureInboundQueueIsEmpty(ctx.sqs, ctx.inboundQueueUrl); - expect(callbacks[0].payload).toMatchObject({ - type: "MessageStatus", + const callback = await awaitCallback( + ctx.cwLogs, + ctx.webhookLogGroup, + event.data.messageId, + "MessageStatus", + ctx.startTime, + ); - attributes: expect.objectContaining({ - messageStatus: "delivered", - }), + expect(callback.payload).toMatchObject({ + type: "MessageStatus", + attributes: expect.objectContaining({ messageStatus: "delivered" }), }); - - assertCallbackHeaders(callbacks[0]); + assertCallbackHeaders(callback); }, 120_000); it("should fan out a message status event to subscription with multiple target endpoints", async () => { - const client2Config = getMockItClient2Config(); - const expectedPaths = buildMockWebhookTargetPaths("client2"); + const fanOutConfig = getClientConfig("clientFanOut"); + const expectedPaths = buildMockWebhookTargetPaths("clientFanOut"); - const messageStatusEvent: StatusPublishEvent = + const event: StatusPublishEvent = createMessageStatusPublishEvent({ - data: { - clientId: client2Config.clientId, - }, + data: { clientId: fanOutConfig.clientId }, }); - await sendSqsEvent(sqsClient, callbackEventQueueUrl, messageStatusEvent); - await ensureInboundQueueIsEmpty(sqsClient, callbackEventQueueUrl); + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, event); + await ensureInboundQueueIsEmpty(ctx.sqs, ctx.inboundQueueUrl); - const callbacks = await awaitSignedCallbacksByCountFromWebhookLogGroup( - cloudWatchClient, - webhookLogGroupName, - messageStatusEvent.data.messageId, + const callbackMap = await awaitCallbacks( + ctx.cwLogs, + ctx.webhookLogGroup, + [event.data.messageId], "MessageStatus", expectedPaths.length, - startTime, + ctx.startTime, ); - expect(callbacks).toHaveLength(expectedPaths.length); + const callbacks = callbackMap.get(event.data.messageId)!; - const actualPaths = callbacks - .map((callback) => callback.path) - .toSorted(compareStrings); - expect(actualPaths).toEqual(expectedPaths.toSorted(compareStrings)); + const paths = callbacks.map((cb) => cb.path); + expect(paths).toHaveLength(expectedPaths.length); + expect(paths).toEqual(expect.arrayContaining(expectedPaths)); for (const callback of callbacks) { expect(callback.payload).toMatchObject({ type: "MessageStatus", attributes: expect.objectContaining({ - messageId: messageStatusEvent.data.messageId, + messageId: event.data.messageId, messageStatus: "delivered", }), }); - assertCallbackHeaders( callback, - client2Config.apiKeyVar, - client2Config.applicationIdVar, + fanOutConfig.apiKeyVar, + fanOutConfig.applicationIdVar, ); } }, 120_000); @@ -173,32 +124,30 @@ describe("SQS to Webhook Integration", () => { describe("Channel Status Event Flow", () => { it("should process channel status event from SQS to webhook", async () => { - const channelStatusEvent: StatusPublishEvent = + const event: StatusPublishEvent = createChannelStatusPublishEvent(); - const callbacks = await processChannelStatusEvent( - sqsClient, - cloudWatchClient, - callbackEventQueueUrl, - webhookLogGroupName, - channelStatusEvent, - webhookTargetPath, - startTime, - ); + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, event); + await ensureInboundQueueIsEmpty(ctx.sqs, ctx.inboundQueueUrl); - expect(callbacks).toHaveLength(1); + const callback = await awaitCallback( + ctx.cwLogs, + ctx.webhookLogGroup, + event.data.messageId, + "ChannelStatus", + ctx.startTime, + ); - expect(callbacks[0].payload).toMatchObject({ + expect(callback.payload).toMatchObject({ type: "ChannelStatus", attributes: expect.objectContaining({ channel: "nhsapp", channelStatus: "delivered", supplierStatus: "delivered", - messageId: channelStatusEvent.data.messageId, + messageId: event.data.messageId, }), }); - - assertCallbackHeaders(callbacks[0]); + assertCallbackHeaders(callback); }, 120_000); }); @@ -206,14 +155,12 @@ describe("SQS to Webhook Integration", () => { it("should route a non-retriable (4xx) webhook response to the per-client DLQ", async () => { const event: StatusPublishEvent = createMessageStatusPublishEvent({ - data: { - messageId: `force-400-${Date.now()}`, - }, + data: { messageId: `force-400-${crypto.randomUUID()}` }, }); - await sendSqsEvent(sqsClient, callbackEventQueueUrl, event); + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, event); - const dlqMessage = await awaitQueueMessage(sqsClient, clientDlqQueueUrl); + const dlqMessage = await awaitQueueMessage(ctx.sqs, clientDlqUrl); expect(dlqMessage.Body).toBeDefined(); expect(dlqMessage.MessageAttributes?.ERROR_CODE?.StringValue).toBe( @@ -223,36 +170,27 @@ describe("SQS to Webhook Integration", () => { dlqMessage.MessageAttributes?.ERROR_MESSAGE?.StringValue, ).toContain("Forced status 400"); - await sqsClient.send( - new DeleteMessageCommand({ - QueueUrl: clientDlqQueueUrl, - ReceiptHandle: dlqMessage.ReceiptHandle!, - }), - ); + await deleteMessage(ctx.sqs, clientDlqUrl, dlqMessage); }, 120_000); }); describe("Inbound Event DLQ", () => { it("should move an invalid inbound event to the inbound-event DLQ when schema validation fails", async () => { - const messageId = `invalid-schema-${Date.now()}`; + const messageId = `invalid-schema-${crypto.randomUUID()}`; const invalidEvent = createMessageStatusPublishEvent({ data: { messageId, - channels: [ - // @ts-expect-error - intentionally invalid for schema-failure DLQ path - { - channelStatus: "DELIVERED", - }, - ], + // @ts-expect-error - intentionally invalid for schema-failure DLQ path + channels: [{ channelStatus: "DELIVERED" }], }, }); - await sendSqsEvent(sqsClient, callbackEventQueueUrl, invalidEvent); - await ensureInboundQueueIsEmpty(sqsClient, callbackEventQueueUrl); + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, invalidEvent); + await ensureInboundQueueIsEmpty(ctx.sqs, ctx.inboundQueueUrl); const dlqMessage = await awaitQueueMessageByMessageId( - sqsClient, - inboundEventDlqQueueUrl, + ctx.sqs, + ctx.inboundDlqUrl, messageId, ); @@ -260,11 +198,43 @@ describe("SQS to Webhook Integration", () => { const dlqPayload = JSON.parse(dlqMessage.Body as string); expect(dlqPayload.data.messageId).toBe(messageId); - await sqsClient.send( - new DeleteMessageCommand({ - QueueUrl: inboundEventDlqQueueUrl, - ReceiptHandle: dlqMessage.ReceiptHandle!, + await deleteMessage(ctx.sqs, ctx.inboundDlqUrl, dlqMessage); + }, 120_000); + }); + + describe("mTLS Delivery", () => { + it("should deliver a callback via mTLS to the mTLS-secured mock webhook", async () => { + const mtlsConfig = getClientConfig("clientMtls"); + + const event: StatusPublishEvent = + createMessageStatusPublishEvent({ + data: { clientId: mtlsConfig.clientId }, + }); + + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, event); + await ensureInboundQueueIsEmpty(ctx.sqs, ctx.inboundQueueUrl); + + const callback = await awaitCallback( + ctx.cwLogs, + ctx.webhookLogGroup, + event.data.messageId, + "MessageStatus", + ctx.startTime, + ); + + expect(callback.path).toBe(buildMockWebhookTargetPath("clientMtls")); + expect(callback.isMtls).toBe(true); + expect(callback.payload).toMatchObject({ + type: "MessageStatus", + attributes: expect.objectContaining({ + messageId: event.data.messageId, + messageStatus: "delivered", }), + }); + assertCallbackHeaders( + callback, + mtlsConfig.apiKeyVar, + mtlsConfig.applicationIdVar, ); }, 120_000); }); diff --git a/tests/integration/metrics.test.ts b/tests/integration/metrics.test.ts index f40eba69..20e1dfb8 100644 --- a/tests/integration/metrics.test.ts +++ b/tests/integration/metrics.test.ts @@ -1,77 +1,50 @@ -import { DeleteMessageCommand, SQSClient } from "@aws-sdk/client-sqs"; -import { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; import type { MessageStatusData, StatusPublishEvent, } from "@nhs-notify-client-callbacks/models"; +import { awaitCallback, awaitEmfMetrics } from "./helpers/cloudwatch"; +import { createMessageStatusPublishEvent } from "./helpers/event-factories"; +import { getClientConfig } from "./helpers/mock-client-config"; import { - buildInboundEventDlqQueueUrl, - buildInboundEventQueueUrl, - buildLambdaLogGroupName, - createCloudWatchLogsClient, - createSqsClient, - getDeploymentDetails, -} from "@nhs-notify-client-callbacks/test-support/helpers"; -import { + awaitQueueMessage, awaitQueueMessageByMessageId, - buildMockClientDlqQueueUrl, + deleteMessage, ensureInboundQueueIsEmpty, purgeQueues, sendSqsEvent, } from "./helpers/sqs"; import { - buildMockWebhookTargetPath, - getMockItClientConfig, -} from "./helpers/mock-client-config"; -import { - awaitAllEmfMetricsInLogGroup, - awaitSignedCallbacksFromWebhookLogGroup, -} from "./helpers/cloudwatch"; -import { createMessageStatusPublishEvent } from "./helpers/event-factories"; + type TestContext, + createTestContext, + destroyTestContext, +} from "./helpers/test-context"; describe("Metrics", () => { - let sqsClient: SQSClient; - let cloudWatchClient: CloudWatchLogsClient; - let callbackEventQueueUrl: string; - let clientDlqQueueUrl: string; - let inboundEventDlqQueueUrl: string; - let logGroupName: string; - let webhookLogGroupName: string; + let ctx: TestContext; + let clientDlqUrl: string; + let transformFilterLogGroup: string; beforeAll(async () => { - const deploymentDetails = getDeploymentDetails(); - const { clientId } = getMockItClientConfig(); - - sqsClient = createSqsClient(deploymentDetails); - cloudWatchClient = createCloudWatchLogsClient(deploymentDetails); - callbackEventQueueUrl = buildInboundEventQueueUrl(deploymentDetails); - clientDlqQueueUrl = buildMockClientDlqQueueUrl(deploymentDetails, clientId); - inboundEventDlqQueueUrl = buildInboundEventDlqQueueUrl(deploymentDetails); - logGroupName = buildLambdaLogGroupName( - deploymentDetails, - "client-transform-filter", - ); - webhookLogGroupName = buildLambdaLogGroupName( - deploymentDetails, - "mock-webhook", - ); - - await purgeQueues(sqsClient, [ - inboundEventDlqQueueUrl, - clientDlqQueueUrl, - callbackEventQueueUrl, + ctx = createTestContext(); + const { clientId } = getClientConfig("clientSingleTarget"); + + clientDlqUrl = ctx.clientDlqUrl(clientId); + transformFilterLogGroup = ctx.logGroup("client-transform-filter"); + + await purgeQueues(ctx.sqs, [ + ctx.inboundDlqUrl, + clientDlqUrl, + ctx.inboundQueueUrl, ]); }); afterAll(async () => { - await purgeQueues(sqsClient, [ - inboundEventDlqQueueUrl, - clientDlqQueueUrl, - callbackEventQueueUrl, + await purgeQueues(ctx.sqs, [ + ctx.inboundDlqUrl, + clientDlqUrl, + ctx.inboundQueueUrl, ]); - - sqsClient.destroy(); - cloudWatchClient.destroy(); + destroyTestContext(ctx); }); describe("Successful event processing", () => { @@ -79,40 +52,38 @@ describe("Metrics", () => { const startTime = Date.now(); const event = createMessageStatusPublishEvent(); - await sendSqsEvent(sqsClient, callbackEventQueueUrl, event); - await ensureInboundQueueIsEmpty(sqsClient, callbackEventQueueUrl); + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, event); + await ensureInboundQueueIsEmpty(ctx.sqs, ctx.inboundQueueUrl); - // Wait for signed callback log to confirm the invocation completed before checking metrics - const callbacks = await awaitSignedCallbacksFromWebhookLogGroup( - cloudWatchClient, - webhookLogGroupName, + await awaitCallback( + ctx.cwLogs, + ctx.webhookLogGroup, event.data.messageId, "MessageStatus", startTime, - buildMockWebhookTargetPath(), ); - expect(callbacks.length).toBeGreaterThan(0); - - await awaitAllEmfMetricsInLogGroup( - cloudWatchClient, - logGroupName, - [ - "EventsReceived", - "TransformationsSuccessful", - "FilteringStarted", - "FilteringMatched", - "CallbacksInitiated", - ], - startTime, - ); + await expect( + awaitEmfMetrics( + ctx.cwLogs, + transformFilterLogGroup, + [ + "EventsReceived", + "TransformationsSuccessful", + "FilteringStarted", + "FilteringMatched", + "CallbacksInitiated", + ], + startTime, + ), + ).resolves.toBeUndefined(); }, 120_000); }); describe("Validation error", () => { it("should emit ValidationErrors metric when an invalid event fails schema validation", async () => { const startTime = Date.now(); - const messageId = `invalid-schema-metrics-${Date.now()}`; + const messageId = `invalid-schema-metrics-${crypto.randomUUID()}`; const invalidEvent: StatusPublishEvent = createMessageStatusPublishEvent({ data: { @@ -122,30 +93,83 @@ describe("Metrics", () => { }, }); - await sendSqsEvent(sqsClient, callbackEventQueueUrl, invalidEvent); + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, invalidEvent); - // Wait for the event to land on the DLQ, confirming the Lambda ran and failed validation const dlqMessage = await awaitQueueMessageByMessageId( - sqsClient, - inboundEventDlqQueueUrl, + ctx.sqs, + ctx.inboundDlqUrl, messageId, ); expect(dlqMessage.Body).toBeDefined(); + await deleteMessage(ctx.sqs, ctx.inboundDlqUrl, dlqMessage); - await sqsClient.send( - new DeleteMessageCommand({ - QueueUrl: inboundEventDlqQueueUrl, - ReceiptHandle: dlqMessage.ReceiptHandle!, - }), + await awaitEmfMetrics( + ctx.cwLogs, + transformFilterLogGroup, + ["EventsReceived", "ValidationErrors"], + startTime, ); + }, 120_000); + }); - await awaitAllEmfMetricsInLogGroup( - cloudWatchClient, - logGroupName, - ["EventsReceived", "ValidationErrors"], + describe("HTTPS Client Lambda metrics", () => { + let httpsClientLogGroup: string; + + beforeAll(() => { + const { clientId } = getClientConfig("clientSingleTarget"); + httpsClientLogGroup = ctx.logGroup(`https-client-${clientId}`); + }); + + it("should emit DeliveryAttempt, DeliverySuccess and DeliveryDurationMs on successful delivery", async () => { + const startTime = Date.now(); + const event = createMessageStatusPublishEvent(); + + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, event); + await ensureInboundQueueIsEmpty(ctx.sqs, ctx.inboundQueueUrl); + + await awaitCallback( + ctx.cwLogs, + ctx.webhookLogGroup, + event.data.messageId, + "MessageStatus", startTime, ); + + await expect( + awaitEmfMetrics( + ctx.cwLogs, + httpsClientLogGroup, + ["DeliveryAttempt", "DeliverySuccess", "DeliveryDurationMs"], + startTime, + ), + ).resolves.toBeUndefined(); + }, 120_000); + + it("should emit DeliveryAttempt, DeliveryPermanentFailure and DeliveryDurationMs on 4xx response", async () => { + const startTime = Date.now(); + const messageId = `force-400-metrics-${crypto.randomUUID()}`; + + const event: StatusPublishEvent = + createMessageStatusPublishEvent({ + data: { messageId }, + }); + + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, event); + + const dlqMessage = await awaitQueueMessage(ctx.sqs, clientDlqUrl, 90_000); + + expect(dlqMessage.Body).toBeDefined(); + await deleteMessage(ctx.sqs, clientDlqUrl, dlqMessage); + + await expect( + awaitEmfMetrics( + ctx.cwLogs, + httpsClientLogGroup, + ["DeliveryAttempt", "DeliveryPermanentFailure", "DeliveryDurationMs"], + startTime, + ), + ).resolves.toBeUndefined(); }, 120_000); }); }); From 39d70f3efadd6cbfc774aaa3e70b3824c0b6f899 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Mon, 27 Apr 2026 16:18:29 +0100 Subject: [PATCH 28/65] Fix DLQ on delivery --- .../modules/client-delivery/module_sqs_per_client.tf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/infrastructure/terraform/modules/client-delivery/module_sqs_per_client.tf b/infrastructure/terraform/modules/client-delivery/module_sqs_per_client.tf index 5811f8b6..0fad559b 100644 --- a/infrastructure/terraform/modules/client-delivery/module_sqs_per_client.tf +++ b/infrastructure/terraform/modules/client-delivery/module_sqs_per_client.tf @@ -18,6 +18,14 @@ module "sqs_delivery" { sqs_policy_overload = data.aws_iam_policy_document.sqs_delivery.json } +resource "aws_sqs_queue_redrive_policy" "delivery" { + queue_url = module.sqs_delivery.sqs_queue_url + redrive_policy = jsonencode({ + deadLetterTargetArn = module.dlq_delivery.sqs_queue_arn + maxReceiveCount = var.sqs_max_receive_count + }) +} + data "aws_iam_policy_document" "sqs_delivery" { statement { sid = "AllowEventBridgeToSendMessage" From 40930221c2457b8bf3c8b75f337cd678ad1e5ac5 Mon Sep 17 00:00:00 2001 From: rhyscoxnhs Date: Wed, 29 Apr 2026 15:40:43 +0100 Subject: [PATCH 29/65] CCM-16073 - Updated rate limiting behaviour (#158) * CCM-16073 - Updated rate limiting behaviour Co-authored-by: Mike Wild Co-authored-by: Tim Marston --- .../terraform/components/callbacks/README.md | 1 + .../callbacks/module_client_delivery.tf | 2 + .../callbacks/module_perf_runner_lambda.tf | 67 +- .../components/callbacks/variables.tf | 6 + .../modules/client-delivery/README.md | 1 + .../module_https_client_lambda.tf | 1 + .../modules/client-delivery/variables.tf | 6 + .../src/__tests__/index.component.test.ts | 1 + .../src/__tests__/services/metrics.test.ts | 19 +- .../src/services/metrics.ts | 55 +- .../src/__tests__/admit-lua.test.ts | 683 +++++++++++------- .../src/__tests__/delivery-metrics.test.ts | 11 + .../__tests__/delivery-observability.test.ts | 15 +- .../src/__tests__/endpoint-gate.test.ts | 179 +++-- .../src/__tests__/handler.test.ts | 313 ++++++-- .../src/__tests__/record-result-lua.test.ts | 541 ++++++++------ lambdas/https-client-lambda/src/handler.ts | 413 +++++++---- .../src/services/admit.lua | 243 ++----- .../src/services/delivery-metrics.ts | 67 +- .../src/services/delivery-observability.ts | 17 +- .../src/services/endpoint-gate.ts | 81 ++- .../src/services/record-result.lua | 269 +++---- lambdas/perf-runner-lambda/package.json | 6 +- .../src/__tests__/cloudwatch.test.ts | 377 +++++++++- .../src/__tests__/elasticache.test.ts | 165 +++++ .../src/__tests__/index.test.ts | 48 ++ .../src/__tests__/purge.test.ts | 116 +++ .../src/__tests__/runner.test.ts | 375 +++++++++- .../src/__tests__/webhook-verify.test.ts | 173 +++++ lambdas/perf-runner-lambda/src/cloudwatch.ts | 120 ++- lambdas/perf-runner-lambda/src/elasticache.ts | 110 +++ lambdas/perf-runner-lambda/src/index.ts | 23 +- lambdas/perf-runner-lambda/src/purge.ts | 40 + lambdas/perf-runner-lambda/src/runner.ts | 147 +++- lambdas/perf-runner-lambda/src/types.ts | 55 ++ .../perf-runner-lambda/src/webhook-verify.ts | 59 ++ pnpm-lock.yaml | 18 +- scripts/tests/integration-debug.sh | 36 +- tests/integration/delivery-resilience.test.ts | 20 +- 39 files changed, 3719 insertions(+), 1160 deletions(-) create mode 100644 lambdas/perf-runner-lambda/src/__tests__/elasticache.test.ts create mode 100644 lambdas/perf-runner-lambda/src/__tests__/purge.test.ts create mode 100644 lambdas/perf-runner-lambda/src/__tests__/webhook-verify.test.ts create mode 100644 lambdas/perf-runner-lambda/src/elasticache.ts create mode 100644 lambdas/perf-runner-lambda/src/purge.ts create mode 100644 lambdas/perf-runner-lambda/src/webhook-verify.ts diff --git a/infrastructure/terraform/components/callbacks/README.md b/infrastructure/terraform/components/callbacks/README.md index 02804698..e090abb9 100644 --- a/infrastructure/terraform/components/callbacks/README.md +++ b/infrastructure/terraform/components/callbacks/README.md @@ -45,6 +45,7 @@ | [s3\_enable\_force\_destroy](#input\_s3\_enable\_force\_destroy) | Whether to enable force destroy for the S3 buckets created in this module | `bool` | `false` | no | | [sqs\_inbound\_event\_max\_receive\_count](#input\_sqs\_inbound\_event\_max\_receive\_count) | n/a | `number` | `3` | no | | [sqs\_inbound\_event\_visibility\_timeout\_seconds](#input\_sqs\_inbound\_event\_visibility\_timeout\_seconds) | n/a | `number` | `60` | no | +| [token\_bucket\_burst\_capacity](#input\_token\_bucket\_burst\_capacity) | Token bucket burst capacity used by the rate limiter | `number` | `2250` | no | ## Modules | Name | Source | Version | diff --git a/infrastructure/terraform/components/callbacks/module_client_delivery.tf b/infrastructure/terraform/components/callbacks/module_client_delivery.tf index ebc2e9e1..5122606e 100644 --- a/infrastructure/terraform/components/callbacks/module_client_delivery.tf +++ b/infrastructure/terraform/components/callbacks/module_client_delivery.tf @@ -41,6 +41,8 @@ module "client_delivery" { mtls_test_cert_s3_key = local.mtls_test_cert_s3_key # gitleaks:allow mtls_test_ca_s3_key = local.mtls_test_ca_s3_key # gitleaks:allow + token_bucket_burst_capacity = var.token_bucket_burst_capacity + vpc_subnet_ids = try(local.acct.private_subnets[local.bc_name], []) lambda_security_group_id = aws_security_group.https_client_lambda.id } diff --git a/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf b/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf index 424294a8..7a77c40c 100644 --- a/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf +++ b/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf @@ -40,6 +40,15 @@ module "perf_runner_lambda" { INBOUND_QUEUE_URL = module.sqs_inbound_event.sqs_queue_url TRANSFORM_FILTER_LOG_GROUP = module.client_transform_filter_lambda.cloudwatch_log_group_name DELIVERY_LOG_GROUP_PREFIX = "/aws/lambda/${local.csi}-https-client-" + MOCK_WEBHOOK_LOG_GROUP = var.deploy_mock_clients ? module.mock_webhook_lambda[0].cloudwatch_log_group_name : "" + ELASTICACHE_ENDPOINT = aws_elasticache_serverless_cache.delivery_state.endpoint[0].address + ELASTICACHE_CACHE_NAME = aws_elasticache_serverless_cache.delivery_state.name + ELASTICACHE_IAM_USERNAME = "${var.project}-${var.environment}-${var.component}-elasticache-user" + } + + vpc_config = { + subnet_ids = try(local.acct.private_subnets[local.bc_name], []) + security_group_ids = [aws_security_group.https_client_lambda.id] } } @@ -74,6 +83,22 @@ data "aws_iam_policy_document" "perf_runner_lambda" { ] } + statement { + sid = "SQSPurgeQueue" + effect = "Allow" + + actions = [ + "sqs:PurgeQueue", + ] + + resources = [ + module.sqs_inbound_event.sqs_queue_arn, + "${module.sqs_inbound_event.sqs_queue_arn}-dlq", + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.csi}-*-delivery-queue", + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.csi}-*-delivery-dlq-queue", + ] + } + statement { sid = "CloudWatchLogsInsightsQuery" effect = "Allow" @@ -83,10 +108,15 @@ data "aws_iam_policy_document" "perf_runner_lambda" { "logs:StopQuery", ] - resources = [ - "arn:aws:logs:${var.region}:${var.aws_account_id}:log-group:${module.client_transform_filter_lambda.cloudwatch_log_group_name}:*", - "arn:aws:logs:${var.region}:${var.aws_account_id}:log-group:/aws/lambda/${local.csi}-https-client-*", - ] + resources = concat( + [ + "arn:aws:logs:${var.region}:${var.aws_account_id}:log-group:${module.client_transform_filter_lambda.cloudwatch_log_group_name}:*", + "arn:aws:logs:${var.region}:${var.aws_account_id}:log-group:/aws/lambda/${local.csi}-https-client-*", + ], + var.deploy_mock_clients ? [ + "arn:aws:logs:${var.region}:${var.aws_account_id}:log-group:${module.mock_webhook_lambda[0].cloudwatch_log_group_name}:*", + ] : [], + ) } statement { @@ -99,4 +129,33 @@ data "aws_iam_policy_document" "perf_runner_lambda" { resources = ["*"] } + + statement { + sid = "ElastiCacheConnect" + effect = "Allow" + + actions = [ + "elasticache:Connect", + ] + + resources = [ + aws_elasticache_serverless_cache.delivery_state.arn, + aws_elasticache_user.delivery_state_iam.arn, + ] + } + + statement { + sid = "VPCNetworkInterfacePermissions" + effect = "Allow" + + actions = [ + "ec2:CreateNetworkInterface", + "ec2:DeleteNetworkInterface", + "ec2:DescribeNetworkInterfaces", + ] + + resources = [ + "*", + ] + } } diff --git a/infrastructure/terraform/components/callbacks/variables.tf b/infrastructure/terraform/components/callbacks/variables.tf index 9c71492d..aef32373 100644 --- a/infrastructure/terraform/components/callbacks/variables.tf +++ b/infrastructure/terraform/components/callbacks/variables.tf @@ -195,3 +195,9 @@ variable "elasticache_data_storage_maximum_gb" { description = "Maximum data storage in GB for the ElastiCache Serverless delivery state cache" default = 1 } + +variable "token_bucket_burst_capacity" { + type = number + description = "Token bucket burst capacity used by the rate limiter" + default = 2250 +} diff --git a/infrastructure/terraform/modules/client-delivery/README.md b/infrastructure/terraform/modules/client-delivery/README.md index 0a4965e7..2036c60d 100644 --- a/infrastructure/terraform/modules/client-delivery/README.md +++ b/infrastructure/terraform/modules/client-delivery/README.md @@ -45,6 +45,7 @@ No requirements. | [sqs\_visibility\_timeout\_seconds](#input\_sqs\_visibility\_timeout\_seconds) | Visibility timeout for the per-client delivery queue | `number` | `60` | no | | [subscription\_targets](#input\_subscription\_targets) | Flattened subscription-target fanout map keyed by subscription-target composite key |
map(object({
subscription_id = string
target_id = string
}))
| n/a | yes | | [subscriptions](#input\_subscriptions) | Subscription definitions for this client, keyed by subscription\_id |
map(object({
subscription_id = string
target_ids = list(string)
}))
| n/a | yes | +| [token\_bucket\_burst\_capacity](#input\_token\_bucket\_burst\_capacity) | Token bucket burst capacity used by the rate limiter | `number` | `2250` | no | | [vpc\_subnet\_ids](#input\_vpc\_subnet\_ids) | VPC subnet IDs for Lambda execution | `list(string)` | `[]` | no | ## Modules diff --git a/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf b/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf index 1260d471..0021fb80 100644 --- a/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf +++ b/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf @@ -53,6 +53,7 @@ module "https_client_lambda" { MTLS_TEST_CERT_S3_BUCKET = var.mtls_test_cert_s3_bucket MTLS_TEST_CERT_S3_KEY = var.mtls_test_cert_s3_key # gitleaks:allow QUEUE_URL = module.sqs_delivery.sqs_queue_url + TOKEN_BUCKET_BURST_CAPACITY = tostring(var.token_bucket_burst_capacity) } vpc_config = var.lambda_security_group_id != "" ? { diff --git a/infrastructure/terraform/modules/client-delivery/variables.tf b/infrastructure/terraform/modules/client-delivery/variables.tf index 643e163e..801ca291 100644 --- a/infrastructure/terraform/modules/client-delivery/variables.tf +++ b/infrastructure/terraform/modules/client-delivery/variables.tf @@ -181,6 +181,12 @@ variable "mtls_test_ca_s3_key" { default = "" } +variable "token_bucket_burst_capacity" { + type = number + description = "Token bucket burst capacity used by the rate limiter" + default = 2250 +} + variable "elasticache_endpoint" { type = string description = "ElastiCache Serverless endpoint URL" diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/index.component.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/index.component.test.ts index b234a244..8198cdfb 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/index.component.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/index.component.test.ts @@ -33,6 +33,7 @@ jest.mock("aws-embedded-metrics", () => ({ Count: "Count", Milliseconds: "Milliseconds", }, + StorageResolution: { High: 1, Standard: 60 }, })); import { GetObjectCommand, NoSuchKey } from "@aws-sdk/client-s3"; diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/metrics.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/metrics.test.ts index bdbcc3aa..3e1a419e 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/metrics.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/services/metrics.test.ts @@ -1,7 +1,15 @@ -import { Unit, createMetricsLogger } from "aws-embedded-metrics"; +import { + StorageResolution, + Unit, + createMetricsLogger, +} from "aws-embedded-metrics"; import { CallbackMetrics, createMetricLogger } from "services/metrics"; -jest.mock("aws-embedded-metrics"); +jest.mock("aws-embedded-metrics", () => ({ + Unit: { Count: "Count" }, + StorageResolution: { High: 1, Standard: 60 }, + createMetricsLogger: jest.fn(), +})); const mockPutMetric = jest.fn(); const mockSetDimensions = jest.fn(); @@ -80,6 +88,7 @@ describe("CallbackMetrics", () => { "EventsReceived", 1, Unit.Count, + StorageResolution.High, ); }); }); @@ -92,6 +101,7 @@ describe("CallbackMetrics", () => { "TransformationsSuccessful", 1, Unit.Count, + StorageResolution.High, ); }); }); @@ -104,6 +114,7 @@ describe("CallbackMetrics", () => { "TransformationsFailed", 1, Unit.Count, + StorageResolution.High, ); }); }); @@ -116,6 +127,7 @@ describe("CallbackMetrics", () => { "CallbacksInitiated", 1, Unit.Count, + StorageResolution.High, ); }); }); @@ -128,6 +140,7 @@ describe("CallbackMetrics", () => { "ValidationErrors", 1, Unit.Count, + StorageResolution.High, ); }); }); @@ -140,6 +153,7 @@ describe("CallbackMetrics", () => { "FilteringStarted", 1, Unit.Count, + StorageResolution.High, ); }); }); @@ -152,6 +166,7 @@ describe("CallbackMetrics", () => { "FilteringMatched", 1, Unit.Count, + StorageResolution.High, ); }); }); diff --git a/lambdas/client-transform-filter-lambda/src/services/metrics.ts b/lambdas/client-transform-filter-lambda/src/services/metrics.ts index 398c5ecc..fd98ec02 100644 --- a/lambdas/client-transform-filter-lambda/src/services/metrics.ts +++ b/lambdas/client-transform-filter-lambda/src/services/metrics.ts @@ -1,4 +1,8 @@ -import { Unit, createMetricsLogger } from "aws-embedded-metrics"; +import { + StorageResolution, + Unit, + createMetricsLogger, +} from "aws-embedded-metrics"; import type { MetricsLogger } from "aws-embedded-metrics"; export const createMetricLogger = (): MetricsLogger => { @@ -21,30 +25,65 @@ export class CallbackMetrics { constructor(private readonly metrics: MetricsLogger) {} emitEventReceived(): void { - this.metrics.putMetric("EventsReceived", 1, Unit.Count); + this.metrics.putMetric( + "EventsReceived", + 1, + Unit.Count, + StorageResolution.High, + ); } emitTransformationSuccess(): void { - this.metrics.putMetric("TransformationsSuccessful", 1, Unit.Count); + this.metrics.putMetric( + "TransformationsSuccessful", + 1, + Unit.Count, + StorageResolution.High, + ); } emitTransformationFailure(): void { - this.metrics.putMetric("TransformationsFailed", 1, Unit.Count); + this.metrics.putMetric( + "TransformationsFailed", + 1, + Unit.Count, + StorageResolution.High, + ); } emitDeliveryInitiated(): void { - this.metrics.putMetric("CallbacksInitiated", 1, Unit.Count); + this.metrics.putMetric( + "CallbacksInitiated", + 1, + Unit.Count, + StorageResolution.High, + ); } emitValidationError(): void { - this.metrics.putMetric("ValidationErrors", 1, Unit.Count); + this.metrics.putMetric( + "ValidationErrors", + 1, + Unit.Count, + StorageResolution.High, + ); } emitFilteringStarted(): void { - this.metrics.putMetric("FilteringStarted", 1, Unit.Count); + this.metrics.putMetric( + "FilteringStarted", + 1, + Unit.Count, + StorageResolution.High, + ); } emitFilteringMatched(): void { - this.metrics.putMetric("FilteringMatched", 1, Unit.Count); + this.metrics.putMetric( + "FilteringMatched", + 1, + Unit.Count, + StorageResolution.High, + ); } } diff --git a/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts b/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts index 6aab4727..7553a1b1 100644 --- a/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts @@ -1,32 +1,34 @@ import admitLuaSrc from "services/admit.lua"; import { createRedisStore, evalLua } from "__tests__/helpers/lua-redis-mock"; -// ARGV: [now, capacity, refillPerSec, cooldownMs, decayPeriodMs, cbWindowPeriodMs, cbProbeIntervalMs] -// KEYS: [cbKey, rlKey] -// Returns: [allowed (0|1), reason, retryAfterMs, effectiveRate] +// ARGV: [now, capacity, targetRateLimit, cooldownMs, recoveryPeriodMs, probeRateLimit, targetBatchSize, cbEnabled] +// KEYS: [epKey] +// Returns: [consumedTokens, reason, retryAfterMs, effectiveRate] type AdmitArgs = { now: number; capacity: number; - refillPerSec: number; + targetRateLimit: number; cooldownMs: number; - decayPeriodMs: number; - cbWindowPeriodMs: number; - cbProbeIntervalMs: number; + recoveryPeriodMs: number; + probeRateLimit: number; + targetBatchSize: number; + cbEnabled: boolean; }; const defaultArgs: AdmitArgs = { now: 1_000_000, - capacity: 10, - refillPerSec: 10, - cooldownMs: 60_000, - decayPeriodMs: 300_000, - cbWindowPeriodMs: 60_000, - cbProbeIntervalMs: 60_000, + capacity: 2250, + targetRateLimit: 10, + cooldownMs: 120_000, + recoveryPeriodMs: 600_000, + probeRateLimit: 1 / 60, + targetBatchSize: 1, + cbEnabled: true, }; type AdmitResult = { - allowed: number; + consumedTokens: number; reason: string; retryAfterMs: number; effectiveRate: number; @@ -40,20 +42,21 @@ function runAdmit( const merged = { ...defaultArgs, ...args }; const raw = evalLua( admitLuaSrc, - [`cb:${targetId}`, `rl:${targetId}`], + [`ep:${targetId}`], [ merged.now.toString(), merged.capacity.toString(), - merged.refillPerSec.toString(), + merged.targetRateLimit.toString(), merged.cooldownMs.toString(), - merged.decayPeriodMs.toString(), - merged.cbWindowPeriodMs.toString(), - merged.cbProbeIntervalMs.toString(), + merged.recoveryPeriodMs.toString(), + merged.probeRateLimit.toString(), + merged.targetBatchSize.toString(), + merged.cbEnabled ? "1" : "0", ], store, ) as [number, string, number, number]; return { - allowed: raw[0], + consumedTokens: raw[0], reason: raw[1], retryAfterMs: raw[2], effectiveRate: raw[3], @@ -62,399 +65,585 @@ function runAdmit( describe("admit.lua", () => { describe("rate limiting", () => { - it("allows the first request with full token bucket", () => { + it("rate-limits on a fresh endpoint with no prior state", () => { const store = createRedisStore(); - const { allowed, effectiveRate, reason, retryAfterMs } = runAdmit(store); + const now = 1_000_000; - expect(allowed).toBe(1); - expect(reason).toBe("allowed"); - expect(retryAfterMs).toBe(0); - expect(effectiveRate).toBe(10); + const { consumedTokens, effectiveRate, reason } = runAdmit(store, { + now, + targetRateLimit: 10, + }); + + expect(consumedTokens).toBe(0); + expect(reason).toBe("rate_limited"); + expect(effectiveRate).toBeCloseTo(1 / 60, 5); }); - it("depletes tokens on consecutive calls and rejects when empty", () => { + it("generates a probe token on the second call after enough elapsed time", () => { const store = createRedisStore(); - for (let i = 0; i < 10; i++) { - const { allowed } = runAdmit(store); - expect(allowed).toBe(1); - } + runAdmit(store, { now: 1_000_000, targetRateLimit: 10 }); - const { allowed, reason } = runAdmit(store); - expect(allowed).toBe(0); - expect(reason).toBe("rate_limited"); + const { consumedTokens, effectiveRate, reason } = runAdmit(store, { + now: 1_060_001, + targetRateLimit: 10, + }); + + expect(effectiveRate).toBeCloseTo(1 / 60, 5); + expect(consumedTokens).toBe(1); + expect(reason).toBe("some_allowed"); }); - it("returns retryAfterMs when rate limited", () => { + it("does not persist circuit state on first contact", () => { const store = createRedisStore(); + const now = 1_000_000; - for (let i = 0; i < 10; i++) { - runAdmit(store); - } + runAdmit(store, { now, targetRateLimit: 10 }); - const { retryAfterMs } = runAdmit(store); - expect(retryAfterMs).toBe(1000); + const epHash = store.get("ep:t1")!; + expect(epHash.has("is_open")).toBe(false); + expect(epHash.has("switched_at")).toBe(false); }); - it("reports effective rate when rate limited", () => { + it("allows full rate after record-result closes the circuit", () => { const store = createRedisStore(); + const now = 1_000_000; - for (let i = 0; i < 10; i++) { - runAdmit(store); - } + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", now.toString()], + ["bucket_tokens", "0"], + ["bucket_refilled_at", now.toString()], + ]), + ); - const { effectiveRate } = runAdmit(store); - expect(effectiveRate).toBe(10); + const later = now + 60_000; + const { consumedTokens, reason } = runAdmit(store, { + now: later, + targetRateLimit: 10, + recoveryPeriodMs: 600_000, + }); + + expect(consumedTokens).toBeGreaterThanOrEqual(1); + expect(reason).toBe("some_allowed"); }); - it("refills tokens over time", () => { + it("allows a single request when bucket has tokens from refill", () => { const store = createRedisStore(); const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["bucket_tokens", "0"], + ["bucket_refilled_at", "0"], + ["switched_at", "0"], + ]), + ); - for (let i = 0; i < 10; i++) { - runAdmit(store, { now }); - } - - const denied = runAdmit(store, { now }); - expect(denied.allowed).toBe(0); + const { consumedTokens, reason, retryAfterMs } = runAdmit(store, { + now, + targetRateLimit: 10, + }); - const refilled = runAdmit(store, { now: now + 1000 }); - expect(refilled.allowed).toBe(1); + expect(consumedTokens).toBe(1); + expect(reason).toBe("some_allowed"); + expect(retryAfterMs).toBe(0); }); - it("caps tokens at capacity", () => { + it("consumes up to targetBatchSize tokens", () => { const store = createRedisStore(); const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["bucket_tokens", "5"], + ["bucket_refilled_at", now.toString()], + ["switched_at", "0"], + ]), + ); - runAdmit(store, { now, capacity: 5, refillPerSec: 100 }); + const { consumedTokens } = runAdmit(store, { + now, + targetBatchSize: 3, + }); + expect(consumedTokens).toBe(3); + }); - // Advance 10 seconds — would add 1000 tokens without cap - runAdmit(store, { now: now + 10_000, capacity: 5, refillPerSec: 100 }); + it("consumes all available when batch exceeds available tokens", () => { + const store = createRedisStore(); + const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["bucket_tokens", "2"], + ["bucket_refilled_at", now.toString()], + ["switched_at", "0"], + ]), + ); - const rlHash = store.get("rl:t1")!; - // Refill capped to capacity (5), then one consumed → 4 - expect(Number(rlHash.get("tokens"))).toBe(4); + const { consumedTokens } = runAdmit(store, { + now, + targetBatchSize: 5, + }); + expect(consumedTokens).toBe(2); }); - it("handles zero refill rate", () => { + it("returns rate_limited when no tokens available", () => { const store = createRedisStore(); + const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["bucket_tokens", "0"], + ["bucket_refilled_at", now.toString()], + ["switched_at", "0"], + ]), + ); - for (let i = 0; i < 10; i++) { - runAdmit(store, { refillPerSec: 0 }); - } - - const { allowed, reason, retryAfterMs } = runAdmit(store, { - refillPerSec: 0, - }); - expect(allowed).toBe(0); + const { consumedTokens, reason, retryAfterMs } = runAdmit(store, { now }); + expect(consumedTokens).toBe(0); expect(reason).toBe("rate_limited"); expect(retryAfterMs).toBe(1000); }); - }); - describe("circuit breaker", () => { - it("rejects when circuit is open", () => { + it("refills tokens over time", () => { const store = createRedisStore(); const now = 1_000_000; - const openedUntil = now + 60_000; - store.set( - "cb:t1", + "ep:t1", new Map([ - ["opened_until_ms", openedUntil.toString()], - ["last_probe_ms", now.toString()], + ["is_open", "0"], + ["bucket_tokens", "0"], + ["bucket_refilled_at", now.toString()], + ["switched_at", "0"], ]), ); - const { allowed, effectiveRate, reason } = runAdmit(store, { now }); - expect(allowed).toBe(0); - expect(reason).toBe("circuit_open"); - expect(effectiveRate).toBe(0); + const { consumedTokens } = runAdmit(store, { + now: now + 1000, + targetRateLimit: 10, + }); + expect(consumedTokens).toBe(1); }); - it("returns retryAfterMs for open circuit", () => { + it("caps tokens at capacity", () => { const store = createRedisStore(); const now = 1_000_000; - const openedUntil = now + 30_000; - store.set( - "cb:t1", + "ep:t1", new Map([ - ["opened_until_ms", openedUntil.toString()], - ["last_probe_ms", now.toString()], + ["is_open", "0"], + ["bucket_tokens", "0"], + ["bucket_refilled_at", "0"], + ["switched_at", "0"], ]), ); - const { retryAfterMs } = runAdmit(store, { now }); - expect(retryAfterMs).toBe(30_000); + const { consumedTokens } = runAdmit(store, { + now, + capacity: 5, + targetRateLimit: 100, + targetBatchSize: 10, + }); + expect(consumedTokens).toBe(5); }); - it("allows probe when probe interval has elapsed", () => { + it("handles zero refill rate", () => { const store = createRedisStore(); const now = 1_000_000; - const openedUntil = now + 120_000; - const lastProbe = now - 61_000; - store.set( - "cb:t1", + "ep:t1", new Map([ - ["opened_until_ms", openedUntil.toString()], - ["last_probe_ms", lastProbe.toString()], + ["is_open", "0"], + ["bucket_tokens", "0"], + ["bucket_refilled_at", now.toString()], + ["switched_at", "0"], ]), ); - const { allowed, effectiveRate, reason, retryAfterMs } = runAdmit(store, { - now, - cbProbeIntervalMs: 60_000, + const { consumedTokens, reason } = runAdmit(store, { + now: now + 10_000, + targetRateLimit: 0, }); - expect(allowed).toBe(1); - expect(reason).toBe("probe"); - expect(retryAfterMs).toBe(0); - expect(effectiveRate).toBe(0); + expect(consumedTokens).toBe(0); + expect(reason).toBe("rate_limited"); }); - it("updates last_probe_ms after allowing a probe", () => { + it("preserves fractional refill time (bucketRefilledAt += generationTime, not now)", () => { const store = createRedisStore(); const now = 1_000_000; - const openedUntil = now + 120_000; - const lastProbe = now - 61_000; - store.set( - "cb:t1", + "ep:t1", new Map([ - ["opened_until_ms", openedUntil.toString()], - ["last_probe_ms", lastProbe.toString()], + ["is_open", "0"], + ["bucket_tokens", "0"], + ["bucket_refilled_at", (now - 150).toString()], + ["switched_at", "0"], ]), ); - runAdmit(store, { now, cbProbeIntervalMs: 60_000 }); + runAdmit(store, { now, targetRateLimit: 10 }); - const cbHash = store.get("cb:t1")!; - expect(cbHash.get("last_probe_ms")).toBe(now.toString()); + const epHash = store.get("ep:t1")!; + const refilledAt = Number(epHash.get("bucket_refilled_at")); + // 1 token generated at rate 10/s takes 100ms, so refilledAt = (now-150) + 100 = now - 50 + expect(refilledAt).toBe(now - 50); }); + }); - it("does not probe when interval has not elapsed", () => { + describe("circuit breaker states", () => { + it("blocks completely when circuit is open during cooldown", () => { const store = createRedisStore(); const now = 1_000_000; - const openedUntil = now + 120_000; - const lastProbe = now - 30_000; + const switchedAt = now - 10_000; store.set( - "cb:t1", + "ep:t1", new Map([ - ["opened_until_ms", openedUntil.toString()], - ["last_probe_ms", lastProbe.toString()], + ["is_open", "1"], + ["switched_at", switchedAt.toString()], + ["bucket_tokens", "100"], ]), ); - const { allowed, reason } = runAdmit(store, { + const { consumedTokens, reason } = runAdmit(store, { now, - cbProbeIntervalMs: 60_000, + cooldownMs: 120_000, }); - expect(allowed).toBe(0); + expect(consumedTokens).toBe(0); expect(reason).toBe("circuit_open"); }); - it("does not probe when cbProbeIntervalMs is 0", () => { + it("does not consume bucket tokens when fully open", () => { const store = createRedisStore(); const now = 1_000_000; - const openedUntil = now + 120_000; + const switchedAt = now - 10_000; store.set( - "cb:t1", + "ep:t1", new Map([ - ["opened_until_ms", openedUntil.toString()], - ["last_probe_ms", "0"], + ["is_open", "1"], + ["switched_at", switchedAt.toString()], + ["bucket_tokens", "100"], + ["bucket_refilled_at", now.toString()], ]), ); - const { allowed, reason } = runAdmit(store, { - now, - cbProbeIntervalMs: 0, - }); - expect(allowed).toBe(0); - expect(reason).toBe("circuit_open"); + runAdmit(store, { now, cooldownMs: 120_000 }); + + const epHash = store.get("ep:t1")!; + expect(Number(epHash.get("bucket_tokens"))).toBe(100); }); - }); - describe("sliding window", () => { - it("initialises cbWindowFrom on first call", () => { + it("returns retryAfterMs for open circuit", () => { const store = createRedisStore(); const now = 1_000_000; + const switchedAt = now - 10_000; - runAdmit(store, { now }); + store.set( + "ep:t1", + new Map([ + ["is_open", "1"], + ["switched_at", switchedAt.toString()], + ]), + ); - const cbHash = store.get("cb:t1")!; - expect(cbHash.get("cb_window_from")).toBe(now.toString()); + const { retryAfterMs } = runAdmit(store, { now, cooldownMs: 120_000 }); + expect(retryAfterMs).toBe(110_000); }); - it("rolls current window to previous when period expires", () => { + it("uses probeRateLimit when half-open (after cooldown)", () => { const store = createRedisStore(); - const cbWindowPeriodMs = 60_000; - const t0 = 1_000_000; - const t1 = t0 + cbWindowPeriodMs + 1; + const now = 1_000_000; + const switchedAt = now - 130_000; store.set( - "cb:t1", + "ep:t1", new Map([ - ["cb_window_from", t0.toString()], - ["cb_failures", "5"], - ["cb_attempts", "10"], - ["cb_prev_failures", "0"], - ["cb_prev_attempts", "0"], + ["is_open", "1"], + ["switched_at", switchedAt.toString()], + ["bucket_tokens", "0"], + ["bucket_refilled_at", (now - 60_000).toString()], ]), ); - runAdmit(store, { now: t1, cbWindowPeriodMs }); - - const cbHash = store.get("cb:t1")!; - expect(cbHash.get("cb_prev_failures")).toBe("5"); - expect(cbHash.get("cb_prev_attempts")).toBe("10"); - expect(cbHash.get("cb_failures")).toBe("0"); - expect(cbHash.get("cb_attempts")).toBe("0"); - expect(cbHash.get("cb_window_from")).toBe(t1.toString()); + const { effectiveRate } = runAdmit(store, { + now, + cooldownMs: 120_000, + probeRateLimit: 1 / 60, + }); + expect(effectiveRate).toBeCloseTo(1 / 60, 5); }); - it("clears both windows when gap exceeds two periods", () => { + it("zeroes residual bucket tokens when circuit is half-open", () => { const store = createRedisStore(); - const cbWindowPeriodMs = 60_000; - const t0 = 1_000_000; - const t1 = t0 + 2 * cbWindowPeriodMs + 1; + const now = 1_000_000; + const switchedAt = now - 130_000; store.set( - "cb:t1", + "ep:t1", new Map([ - ["cb_window_from", t0.toString()], - ["cb_failures", "5"], - ["cb_attempts", "10"], - ["cb_prev_failures", "3"], - ["cb_prev_attempts", "7"], + ["is_open", "1"], + ["switched_at", switchedAt.toString()], + ["bucket_tokens", "100"], + ["bucket_refilled_at", (now - 60_000).toString()], ]), ); - runAdmit(store, { now: t1, cbWindowPeriodMs }); + const { consumedTokens } = runAdmit(store, { + now, + cooldownMs: 120_000, + probeRateLimit: 1 / 60, + }); - const cbHash = store.get("cb:t1")!; - expect(cbHash.get("cb_prev_failures")).toBe("0"); - expect(cbHash.get("cb_prev_attempts")).toBe("0"); - expect(cbHash.get("cb_failures")).toBe("0"); - expect(cbHash.get("cb_attempts")).toBe("0"); - expect(cbHash.get("cb_window_from")).toBe(t1.toString()); + expect(consumedTokens).toBe(1); + const epHash = store.get("ep:t1")!; + expect(Number(epHash.get("bucket_tokens"))).toBe(0); }); - }); - describe("decay scaling", () => { - it("applies reduced rate during decay period", () => { + it("uses recovery ramp when closed during recovery period", () => { const store = createRedisStore(); - const closedAt = 1_000_000; - const decayPeriodMs = 300_000; - const halfwayThrough = closedAt + decayPeriodMs / 2; + const switchedAt = 1_000_000; + const recoveryPeriodMs = 600_000; + const now = switchedAt + recoveryPeriodMs / 2; - store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", switchedAt.toString()], + ["bucket_tokens", "0"], + ["bucket_refilled_at", "0"], + ]), + ); const { effectiveRate } = runAdmit(store, { - now: halfwayThrough, - refillPerSec: 10, - decayPeriodMs, + now, + targetRateLimit: 10, + recoveryPeriodMs, }); - expect(effectiveRate).toBe(5); + const probeRate = defaultArgs.probeRateLimit; + const expectedRate = probeRate + 0.5 * (10 - probeRate); + expect(effectiveRate).toBeCloseTo(expectedRate, 5); }); - it("uses full rate after decay period ends", () => { + it("uses full rate when closed and past recovery period", () => { const store = createRedisStore(); - const closedAt = 1_000_000; - const decayPeriodMs = 300_000; - const afterDecay = closedAt + decayPeriodMs + 1; + const switchedAt = 100_000; + const recoveryPeriodMs = 600_000; + const now = switchedAt + recoveryPeriodMs + 1; - store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", switchedAt.toString()], + ["bucket_tokens", "0"], + ["bucket_refilled_at", "0"], + ]), + ); - const { allowed, effectiveRate } = runAdmit(store, { - now: afterDecay, - refillPerSec: 10, - decayPeriodMs, + const { effectiveRate } = runAdmit(store, { + now, + targetRateLimit: 10, + recoveryPeriodMs, }); - expect(allowed).toBe(1); expect(effectiveRate).toBe(10); }); + }); - it("clamps minimum effective rate to 1", () => { + describe("state persistence", () => { + it("persists bucket_tokens and bucket_refilled_at", () => { const store = createRedisStore(); - const closedAt = 1_000_000; - const decayPeriodMs = 300_000; - const veryEarly = closedAt + 1; + const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["bucket_tokens", "5"], + ["bucket_refilled_at", now.toString()], + ["switched_at", "0"], + ]), + ); - store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); + runAdmit(store, { now, targetBatchSize: 2 }); - const { effectiveRate } = runAdmit(store, { - now: veryEarly, - refillPerSec: 10, - decayPeriodMs, + const epHash = store.get("ep:t1")!; + expect(Number(epHash.get("bucket_tokens"))).toBe(3); + }); + + it("does not write any fields when circuit_open early return", () => { + const store = createRedisStore(); + runAdmit(store, { + now: 10_000, }); - expect(effectiveRate).toBeGreaterThanOrEqual(1); + + expect(store.has("ep:t1")).toBe(false); }); - it("clears openedUntil when decay period fully elapses", () => { + it("does not write sampling or circuit fields on half-open path", () => { const store = createRedisStore(); - const closedAt = 1_000_000; - const decayPeriodMs = 300_000; - const afterDecay = closedAt + decayPeriodMs + 1; + runAdmit(store, { + now: 200_000, + }); - store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); + const epHash = store.get("ep:t1")!; + expect(epHash.has("bucket_tokens")).toBe(true); + expect(epHash.has("bucket_refilled_at")).toBe(true); + expect(epHash.has("cur_attempts")).toBe(false); + expect(epHash.has("cur_failures")).toBe(false); + expect(epHash.has("sample_till")).toBe(false); + expect(epHash.has("is_open")).toBe(false); + expect(epHash.has("switched_at")).toBe(false); + }); + + it("isolates state between targets", () => { + const store = createRedisStore(); + store.set( + "ep:target-a", + new Map([ + ["is_open", "0"], + ["bucket_tokens", "5"], + ["bucket_refilled_at", "10000"], + ]), + ); + store.set( + "ep:target-b", + new Map([ + ["is_open", "0"], + ["bucket_tokens", "3"], + ["bucket_refilled_at", "10000"], + ]), + ); - runAdmit(store, { now: afterDecay, decayPeriodMs }); + runAdmit(store, { now: 10_000 }, "target-a"); + runAdmit(store, { now: 10_000 }, "target-b"); - const cbHash = store.get("cb:t1")!; - expect(cbHash.get("opened_until_ms")).toBe("0"); + expect(store.has("ep:target-a")).toBe(true); + expect(store.has("ep:target-b")).toBe(true); }); + }); - it("does not decay when decayPeriodMs is 0", () => { + describe("circuit breaker disabled (cbEnabled = false)", () => { + it("uses full targetRateLimit on a fresh endpoint with no prior state", () => { const store = createRedisStore(); - const closedAt = 1_000_000; + const now = 1_000_000; - store.set("cb:t1", new Map([["opened_until_ms", closedAt.toString()]])); + const { effectiveRate } = runAdmit(store, { + now, + targetRateLimit: 10, + cbEnabled: false, + }); - const { allowed, effectiveRate } = runAdmit(store, { - now: closedAt + 1, - refillPerSec: 10, - decayPeriodMs: 0, + expect(effectiveRate).toBe(10); + }); + + it("applies initial values on fresh endpoint so first call has no tokens", () => { + const store = createRedisStore(); + const now = 1_000_000; + + const { consumedTokens, effectiveRate, reason } = runAdmit(store, { + now, + targetRateLimit: 10, + cbEnabled: false, }); - expect(allowed).toBe(1); + expect(effectiveRate).toBe(10); + expect(consumedTokens).toBe(0); + expect(reason).toBe("rate_limited"); }); - }); - describe("state persistence", () => { - it("persists token count and last_refill_ms", () => { + it("generates tokens at full rate after initial contact", () => { const store = createRedisStore(); - runAdmit(store, { now: 1_000_000, capacity: 5 }); - const rlHash = store.get("rl:t1")!; - expect(rlHash.get("tokens")).toBeDefined(); - expect(rlHash.get("last_refill_ms")).toBe("1000000"); + runAdmit(store, { + now: 1_000_000, + targetRateLimit: 10, + cbEnabled: false, + }); + const { consumedTokens, reason } = runAdmit(store, { + now: 1_000_100, + targetRateLimit: 10, + cbEnabled: false, + }); + + expect(consumedTokens).toBe(1); + expect(reason).toBe("some_allowed"); }); - it("persists circuit breaker fields", () => { + it("ignores is_open state when CB is disabled", () => { const store = createRedisStore(); - runAdmit(store, { now: 1_000_000 }); - - const cbHash = store.get("cb:t1")!; - expect(cbHash.has("opened_until_ms")).toBe(true); - expect(cbHash.has("cb_window_from")).toBe(true); - expect(cbHash.has("cb_failures")).toBe(true); - expect(cbHash.has("cb_attempts")).toBe(true); - expect(cbHash.has("cb_prev_failures")).toBe(true); - expect(cbHash.has("cb_prev_attempts")).toBe(true); + const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "1"], + ["switched_at", now.toString()], + ["bucket_tokens", "5"], + ["bucket_refilled_at", now.toString()], + ]), + ); + + const { consumedTokens, effectiveRate, reason } = runAdmit(store, { + now, + targetRateLimit: 10, + cbEnabled: false, + }); + + expect(effectiveRate).toBe(10); + expect(consumedTokens).toBe(1); + expect(reason).toBe("some_allowed"); }); - it("isolates state between targets", () => { + it("does not zero bucket tokens when is_open and CB disabled", () => { const store = createRedisStore(); - runAdmit(store, {}, "target-a"); - runAdmit(store, {}, "target-b"); + const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "1"], + ["switched_at", now.toString()], + ["bucket_tokens", "5"], + ["bucket_refilled_at", now.toString()], + ]), + ); + + const { consumedTokens } = runAdmit(store, { + now, + targetRateLimit: 10, + cbEnabled: false, + targetBatchSize: 3, + }); + + expect(consumedTokens).toBe(3); + }); + + it("never returns circuit_open when CB is disabled", () => { + const store = createRedisStore(); + const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "1"], + ["switched_at", (now - 10_000).toString()], + ["bucket_tokens", "0"], + ["bucket_refilled_at", now.toString()], + ]), + ); + + const { reason } = runAdmit(store, { + now, + cooldownMs: 120_000, + cbEnabled: false, + }); - expect(store.has("cb:target-a")).toBe(true); - expect(store.has("cb:target-b")).toBe(true); - expect(store.has("rl:target-a")).toBe(true); - expect(store.has("rl:target-b")).toBe(true); + expect(reason).not.toBe("circuit_open"); }); }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts b/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts index 803c19bb..249b0148 100644 --- a/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts @@ -1,6 +1,7 @@ const mockCreateMetricsLogger = jest.fn(); jest.mock("aws-embedded-metrics", () => ({ Unit: { Count: "Count", Milliseconds: "Milliseconds" }, + StorageResolution: { High: 1, Standard: 60 }, createMetricsLogger: () => mockCreateMetricsLogger(), })); @@ -83,6 +84,7 @@ describe("delivery-metrics", () => { "DeliveryAttempt", 1, "Count", + 1, ); }); @@ -96,6 +98,7 @@ describe("delivery-metrics", () => { "DeliverySuccess", 1, "Count", + 1, ); }); @@ -109,6 +112,7 @@ describe("delivery-metrics", () => { "DeliveryFailure", 1, "Count", + 1, ); }); @@ -123,6 +127,7 @@ describe("delivery-metrics", () => { "DeliveryPermanentFailure", 1, "Count", + 1, ); }); @@ -137,6 +142,7 @@ describe("delivery-metrics", () => { "CircuitBreakerOpen", 1, "Count", + 1, ); }); @@ -151,6 +157,7 @@ describe("delivery-metrics", () => { "DeliveryRateLimited", 1, "Count", + 1, ); }); @@ -165,6 +172,7 @@ describe("delivery-metrics", () => { "CircuitBreakerClosed", 1, "Count", + 1, ); }); @@ -179,6 +187,7 @@ describe("delivery-metrics", () => { "DeliveryRetryWindowExhausted", 1, "Count", + 1, ); }); @@ -201,6 +210,7 @@ describe("delivery-metrics", () => { "AdmissionDenied", 1, "Count", + 1, ); }); @@ -215,6 +225,7 @@ describe("delivery-metrics", () => { "DeliveryDurationMs", 250, "Milliseconds", + 1, ); }); diff --git a/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts b/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts index 25e164a9..2c0c0543 100644 --- a/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts @@ -39,7 +39,7 @@ describe("delivery-observability", () => { ); const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); - recordDeliveryAttempt("client-1", "target-1", "msg-123"); + recordDeliveryAttempt("client-1", "target-1", "msg-123", "sqs-msg-1", 3); expect(emitDeliveryAttempt).toHaveBeenCalledWith("target-1"); expect(logger.info).toHaveBeenCalledWith( @@ -48,6 +48,8 @@ describe("delivery-observability", () => { clientId: "client-1", targetId: "target-1", correlationId: "msg-123", + sqsMessageId: "sqs-msg-1", + receiveCount: 3, }), ); }); @@ -190,25 +192,30 @@ describe("delivery-observability", () => { ); }); - it("recordAdmissionDenied emits metric and logs", () => { + it("recordAdmissionDenied emits per-record metrics and logs", () => { const { emitAdmissionDenied } = jest.requireMock( "services/delivery-metrics", ); const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); - recordAdmissionDenied("client-1", "target-1", "rate_limited", "msg-123"); + recordAdmissionDenied("client-1", "target-1", "rate_limited", [ + "msg-a", + "msg-b", + ]); expect(emitAdmissionDenied).toHaveBeenCalledWith( "target-1", "rate_limited", + 2, ); expect(logger.warn).toHaveBeenCalledWith( "Admission denied", expect.objectContaining({ clientId: "client-1", targetId: "target-1", - correlationId: "msg-123", reason: "rate_limited", + deniedCount: 2, + correlationIds: ["msg-a", "msg-b"], }), ); }); diff --git a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts index efbc6d88..57573a3d 100644 --- a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts @@ -11,13 +11,13 @@ const mockDisconnect = jest.fn().mockResolvedValue(undefined); const mockOn = jest.fn(); const defaultConfig: EndpointGateConfig = { - burstCapacity: 10, - cbProbeIntervalMs: 60_000, - decayPeriodMs: 300_000, - cbWindowPeriodMs: 60_000, - cbErrorThreshold: 0.5, - cbMinAttempts: 10, - cbCooldownMs: 60_000, + burstCapacity: 2250, + probeRateLimit: 1 / 60, + recoveryPeriodMs: 600_000, + samplePeriodMs: 300_000, + failureThreshold: 0.3, + minAttempts: 5, + cooldownPeriodMs: 120_000, }; const mockRedis = { @@ -34,12 +34,23 @@ beforeEach(() => { }); describe("admit", () => { - it("returns allowed when tokens available", async () => { - mockSendCommand.mockResolvedValueOnce([1, "allowed", 0, 10]); + it("returns allowed with consumedTokens when tokens available", async () => { + mockSendCommand.mockResolvedValueOnce([5, "some_allowed", 0, 10]); - const result = await admit(mockRedis, "target-1", 10, true, defaultConfig); + const result = await admit( + mockRedis, + "target-1", + 10, + true, + 5, + defaultConfig, + ); - expect(result).toEqual({ allowed: true, probe: false, effectiveRate: 10 }); + expect(result).toEqual({ + allowed: true, + consumedTokens: 5, + effectiveRate: 10, + }); expect(mockSendCommand).toHaveBeenCalledWith( expect.arrayContaining(["EVALSHA"]), ); @@ -48,7 +59,14 @@ describe("admit", () => { it("returns rate_limited when tokens exhausted", async () => { mockSendCommand.mockResolvedValueOnce([0, "rate_limited", 1000, 10]); - const result = await admit(mockRedis, "target-1", 10, false, defaultConfig); + const result = await admit( + mockRedis, + "target-1", + 10, + false, + 5, + defaultConfig, + ); expect(result).toEqual({ allowed: false, @@ -58,18 +76,17 @@ describe("admit", () => { }); }); - it("returns allowed with probe flag when circuit is open but probe slot is available", async () => { - mockSendCommand.mockResolvedValueOnce([1, "probe", 0, 0]); - - const result = await admit(mockRedis, "target-1", 10, true, defaultConfig); - - expect(result).toEqual({ allowed: true, probe: true, effectiveRate: 0 }); - }); - - it("returns circuit_open without probe slot", async () => { + it("returns circuit_open when circuit is fully open", async () => { mockSendCommand.mockResolvedValueOnce([0, "circuit_open", 30_000, 0]); - const result = await admit(mockRedis, "target-1", 10, true, defaultConfig); + const result = await admit( + mockRedis, + "target-1", + 10, + true, + 5, + defaultConfig, + ); expect(result).toEqual({ allowed: false, @@ -82,11 +99,22 @@ describe("admit", () => { it("falls back to EVAL on NOSCRIPT error", async () => { mockSendCommand .mockRejectedValueOnce(new Error("NOSCRIPT No matching script")) - .mockResolvedValueOnce([1, "allowed", 0, 10]); + .mockResolvedValueOnce([1, "some_allowed", 0, 10]); - const result = await admit(mockRedis, "target-1", 10, true, defaultConfig); + const result = await admit( + mockRedis, + "target-1", + 10, + true, + 1, + defaultConfig, + ); - expect(result).toEqual({ allowed: true, probe: false, effectiveRate: 10 }); + expect(result).toEqual({ + allowed: true, + consumedTokens: 1, + effectiveRate: 10, + }); expect(mockSendCommand).toHaveBeenCalledTimes(2); expect(mockSendCommand).toHaveBeenNthCalledWith( 1, @@ -98,25 +126,33 @@ describe("admit", () => { ); }); - it("passes cbProbeIntervalMs=0 when circuit breaker is disabled", async () => { - mockSendCommand.mockResolvedValueOnce([1, "allowed", 0, 10]); + it("passes cbEnabled=0 when circuit breaker is disabled", async () => { + mockSendCommand.mockResolvedValueOnce([1, "some_allowed", 0, 10]); + + await admit(mockRedis, "target-1", 10, false, 1, defaultConfig); + + const args = mockSendCommand.mock.calls[0]![0] as string[]; + const cbEnabledArg = args[11]; + expect(cbEnabledArg).toBe("0"); + }); + + it("passes single epKey", async () => { + mockSendCommand.mockResolvedValueOnce([1, "some_allowed", 0, 5]); - await admit(mockRedis, "target-1", 10, false, defaultConfig); + await admit(mockRedis, "my-target", 5, true, 1, defaultConfig); - // EVALSHA layout: [EVALSHA, sha, keyCount, cbKey, rlKey, now, capacity, refillPerSec, cooldownMs, decayPeriodMs, cbWindowPeriodMs, cbProbeIntervalMs] const args = mockSendCommand.mock.calls[0]![0] as string[]; - const cbProbeIntervalArg = args[11]; - expect(cbProbeIntervalArg).toBe("0"); + expect(args[3]).toBe("ep:{my-target}"); }); - it("passes cbKey first, rlKey second", async () => { - mockSendCommand.mockResolvedValueOnce([1, "allowed", 0, 5]); + it("passes targetBatchSize as ARGV", async () => { + mockSendCommand.mockResolvedValueOnce([3, "some_allowed", 0, 10]); - await admit(mockRedis, "my-target", 5, true, defaultConfig); + await admit(mockRedis, "target-1", 10, true, 7, defaultConfig); const args = mockSendCommand.mock.calls[0]![0] as string[]; - expect(args[3]).toBe("cb:{my-target}"); - expect(args[4]).toBe("rl:{my-target}"); + const batchSizeArg = args[10]; + expect(batchSizeArg).toBe("7"); }); }); @@ -130,6 +166,7 @@ describe("evalScript", () => { "target-1", 10, true, + 1, defaultConfig, ).catch((error: unknown) => error); @@ -149,6 +186,7 @@ describe("evalScript", () => { "target-1", 10, true, + 1, defaultConfig, ).catch((error: unknown) => error); @@ -159,70 +197,101 @@ describe("evalScript", () => { }); describe("recordResult", () => { - it("returns closed on success below threshold", async () => { - mockSendCommand.mockResolvedValueOnce([1, "closed"]); + it("returns closed state when circuit is steady-state", async () => { + mockSendCommand.mockResolvedValueOnce(["closed", 0]); const result = await recordResult( mockRedis, "target-1", - true, + 5, + 0, defaultConfig, ); - expect(result).toEqual({ ok: true, state: "closed" }); + expect(result).toEqual({ circuitState: "closed", stateChanged: false }); expect(mockSendCommand).toHaveBeenCalledWith( expect.arrayContaining(["EVALSHA"]), ); }); - it("returns opened when failure crosses threshold", async () => { - mockSendCommand.mockResolvedValueOnce([0, "opened"]); + it("returns open with stateChanged when failure crosses threshold", async () => { + mockSendCommand.mockResolvedValueOnce(["open", 1]); const result = await recordResult( mockRedis, "target-1", - false, + 5, + 5, defaultConfig, ); - expect(result).toEqual({ ok: false, state: "opened" }); + expect(result).toEqual({ circuitState: "open", stateChanged: true }); }); - it("returns failed when failure is below threshold", async () => { - mockSendCommand.mockResolvedValueOnce([0, "failed"]); + it("returns closed_recovery with stateChanged when circuit closes", async () => { + mockSendCommand.mockResolvedValueOnce(["closed_recovery", 1]); const result = await recordResult( mockRedis, "target-1", - false, + 5, + 0, + defaultConfig, + ); + + expect(result).toEqual({ + circuitState: "closed_recovery", + stateChanged: true, + }); + }); + + it("returns half_open without stateChanged when probing", async () => { + mockSendCommand.mockResolvedValueOnce(["half_open", 0]); + + const result = await recordResult( + mockRedis, + "target-1", + 5, + 1, defaultConfig, ); - expect(result).toEqual({ ok: false, state: "failed" }); + expect(result).toEqual({ circuitState: "half_open", stateChanged: false }); }); it("falls back to EVAL on NOSCRIPT error", async () => { mockSendCommand .mockRejectedValueOnce(new Error("NOSCRIPT No matching script")) - .mockResolvedValueOnce([1, "closed"]); + .mockResolvedValueOnce(["closed", 0]); const result = await recordResult( mockRedis, "target-1", - true, + 1, + 0, defaultConfig, ); - expect(result).toEqual({ ok: true, state: "closed" }); + expect(result).toEqual({ circuitState: "closed", stateChanged: false }); expect(mockSendCommand).toHaveBeenCalledTimes(2); }); - it("passes correct cb key for target", async () => { - mockSendCommand.mockResolvedValueOnce([1, "closed"]); + it("passes correct ep key for target", async () => { + mockSendCommand.mockResolvedValueOnce(["closed", 0]); + + await recordResult(mockRedis, "my-target", 1, 0, defaultConfig); + + const args = mockSendCommand.mock.calls[0]![0] as string[]; + expect(args[3]).toBe("ep:{my-target}"); + }); + + it("passes consumedTokens and processingFailures as ARGV", async () => { + mockSendCommand.mockResolvedValueOnce(["closed", 0]); - await recordResult(mockRedis, "my-target", true, defaultConfig); + await recordResult(mockRedis, "target-1", 8, 3, defaultConfig); const args = mockSendCommand.mock.calls[0]![0] as string[]; - expect(args[3]).toBe("cb:{my-target}"); + expect(args[5]).toBe("8"); + expect(args[6]).toBe("3"); }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 3b8ad521..7121cbeb 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -3,7 +3,6 @@ import { DEFAULT_TARGET, makeRecord, } from "__tests__/fixtures/handler-fixtures"; -import { VisibilityManagedError } from "services/visibility-managed-error"; jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { @@ -74,17 +73,20 @@ jest.mock("services/redis-client", () => ({ getRedisClient: (...args: unknown[]) => mockGetRedisClient(...args), })); +jest.mock("services/delivery-observability", () => ({ + recordAdmissionDenied: jest.fn(), + recordCircuitBreakerClosed: jest.fn(), + recordCircuitBreakerOpen: jest.fn(), + recordDeliveryAttempt: jest.fn(), + recordDeliveryDuration: jest.fn(), + recordDeliveryFailure: jest.fn(), + recordDeliveryPermanentFailure: jest.fn(), + recordDeliveryRateLimited: jest.fn(), + recordDeliverySuccess: jest.fn(), + recordRetryWindowExhausted: jest.fn(), +})); + jest.mock("services/delivery-metrics", () => ({ - emitAdmissionDenied: jest.fn(), - emitCircuitBreakerClosed: jest.fn(), - emitCircuitBreakerOpen: jest.fn(), - emitDeliveryAttempt: jest.fn(), - emitDeliveryDuration: jest.fn(), - emitDeliveryFailure: jest.fn(), - emitDeliveryPermanentFailure: jest.fn(), - emitDeliverySuccess: jest.fn(), - emitRateLimited: jest.fn(), - emitRetryWindowExhausted: jest.fn(), flushMetrics: jest.fn().mockResolvedValue(undefined), resetMetrics: jest.fn(), })); @@ -106,15 +108,15 @@ describe("processRecords", () => { mockJitteredBackoff.mockReturnValue(5); mockIsWindowExhausted.mockReturnValue(false); mockHandleRateLimitedRecord.mockRejectedValue( - new VisibilityManagedError("Rate limited — requeue"), + new Error("Rate limited — requeue"), ); mockGetRedisClient.mockResolvedValue({}); mockAdmit.mockResolvedValue({ allowed: true, - probe: false, + consumedTokens: 100, effectiveRate: 10, }); - mockRecordResult.mockResolvedValue({ ok: true, state: "closed" }); + mockRecordResult.mockResolvedValue({ ok: true, state: "ok" }); }); it("returns no failures on successful delivery", async () => { @@ -159,7 +161,7 @@ describe("processRecords", () => { expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); }); - it("returns failure for 429 rate-limited responses", async () => { + it("returns failure for 429 when handleRateLimitedRecord rejects", async () => { mockDeliverPayload.mockResolvedValue({ outcome: "rate_limited", retryAfterHeader: "60", @@ -177,7 +179,7 @@ describe("processRecords", () => { ); }); - it("processes multiple records independently", async () => { + it("processes multiple records in a single target batch", async () => { const record1 = makeRecord({ messageId: "msg-1" }); const record2 = makeRecord({ messageId: "msg-2" }); @@ -191,25 +193,67 @@ describe("processRecords", () => { const failures = await processRecords([record1, record2]); expect(failures).toEqual([{ itemIdentifier: "msg-2" }]); + expect(mockAdmit).toHaveBeenCalledTimes(1); + }); + + it("delivers only admitted records when consumedTokens is less than batch size", async () => { + const record1 = makeRecord({ + messageId: "msg-1", + receiptHandle: "receipt-1", + }); + const record2 = makeRecord({ + messageId: "msg-2", + receiptHandle: "receipt-2", + }); + const record3 = makeRecord({ + messageId: "msg-3", + receiptHandle: "receipt-3", + }); + + mockAdmit.mockResolvedValue({ + allowed: true, + consumedTokens: 1, + effectiveRate: 10, + }); + + const { recordAdmissionDenied } = jest.requireMock( + "services/delivery-observability", + ); + + const failures = await processRecords([record1, record2, record3]); + + expect(mockDeliverPayload).toHaveBeenCalledTimes(1); + expect(failures).toEqual([ + { itemIdentifier: "msg-2" }, + { itemIdentifier: "msg-3" }, + ]); + expect(recordAdmissionDenied).toHaveBeenCalledWith( + "client-1", + "target-1", + "rate_limited", + ["test-message-id", "test-message-id"], + ); + + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-2", 1); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-3", 1); }); - it("an unexpected error on one record does not prevent subsequent records being processed", async () => { + it("an unexpected delivery error does not prevent other records in the batch", async () => { const record1 = makeRecord({ messageId: "msg-1" }); const record2 = makeRecord({ messageId: "msg-2" }); - mockLoadTargetConfig - .mockRejectedValueOnce(new Error("S3 unavailable")) - .mockResolvedValueOnce(DEFAULT_TARGET); + mockDeliverPayload + .mockRejectedValueOnce(new Error("Connection reset")) + .mockResolvedValueOnce({ outcome: "success" }); const failures = await processRecords([record1, record2]); expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); - expect(mockDeliverPayload).toHaveBeenCalledTimes(1); expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 5); }); it("applies jittered backoff cooldown on unexpected errors", async () => { - mockLoadTargetConfig.mockRejectedValue(new Error("Infrastructure error")); + mockDeliverPayload.mockRejectedValue(new Error("Infrastructure error")); const failures = await processRecords([makeRecord()]); @@ -217,7 +261,7 @@ describe("processRecords", () => { expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 5); }); - it("does not apply a second visibility change for admission-denied (managed path)", async () => { + it("changes visibility once per record for admission-denied batch", async () => { mockAdmit.mockResolvedValue({ allowed: false, reason: "rate_limited", @@ -230,7 +274,7 @@ describe("processRecords", () => { expect(mockChangeVisibility).toHaveBeenCalledTimes(1); }); - it("does not apply a second visibility change for transient failure (managed path)", async () => { + it("changes visibility once for transient failure", async () => { mockDeliverPayload.mockResolvedValue({ outcome: "transient_failure", statusCode: 503, @@ -241,13 +285,13 @@ describe("processRecords", () => { expect(mockChangeVisibility).toHaveBeenCalledTimes(1); }); - it("returns failure when CLIENT_ID is not set", async () => { + it("throws when CLIENT_ID is not set", async () => { const saved = process.env.CLIENT_ID; delete process.env.CLIENT_ID; - const failures = await processRecords([makeRecord()]); - - expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); + await expect(processRecords([makeRecord()])).rejects.toThrow( + "CLIENT_ID is required", + ); process.env.CLIENT_ID = saved; }); @@ -262,7 +306,7 @@ describe("processRecords", () => { expect(mockDeliverPayload).not.toHaveBeenCalled(); }); - it("calls changeVisibility with backoff on 5xx then throws", async () => { + it("calls changeVisibility with backoff on 5xx", async () => { mockDeliverPayload.mockResolvedValue({ outcome: "transient_failure", statusCode: 503, @@ -303,7 +347,7 @@ describe("processRecords", () => { expect(failures).toEqual([]); }); - it("requeues when rate limited by endpoint gate", async () => { + it("requeues all records when rate limited by endpoint gate", async () => { mockAdmit.mockResolvedValue({ allowed: false, reason: "rate_limited", @@ -314,12 +358,14 @@ describe("processRecords", () => { const failures = await processRecords([makeRecord()]); expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 2); + const visibilityDelay = mockChangeVisibility.mock.calls[0]![1] as number; + expect(visibilityDelay).toBeGreaterThanOrEqual(2); + expect(visibilityDelay).toBeLessThanOrEqual(6); expect(mockSendToDlq).not.toHaveBeenCalled(); expect(mockDeliverPayload).not.toHaveBeenCalled(); }); - it("requeues when circuit is open", async () => { + it("requeues all records when circuit is open", async () => { mockAdmit.mockResolvedValue({ allowed: false, reason: "circuit_open", @@ -330,7 +376,9 @@ describe("processRecords", () => { const failures = await processRecords([makeRecord()]); expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 30); + const visibilityDelay = mockChangeVisibility.mock.calls[0]![1] as number; + expect(visibilityDelay).toBeGreaterThanOrEqual(30); + expect(visibilityDelay).toBeLessThanOrEqual(34); expect(mockSendToDlq).not.toHaveBeenCalled(); expect(mockDeliverPayload).not.toHaveBeenCalled(); }); @@ -350,17 +398,23 @@ describe("processRecords", () => { "target-1", 10, false, + 1, expect.any(Object), ); expect(mockDeliverPayload).toHaveBeenCalled(); }); - it("calls recordResult(true) on successful delivery when CB enabled", async () => { + it("calls recordResult with batch counts on successful delivery when CB enabled", async () => { const targetCb = { ...DEFAULT_TARGET, delivery: { circuitBreaker: { enabled: true } }, }; mockLoadTargetConfig.mockResolvedValue(targetCb); + mockAdmit.mockResolvedValue({ + allowed: true, + consumedTokens: 1, + effectiveRate: 10, + }); const failures = await processRecords([makeRecord()]); @@ -368,17 +422,23 @@ describe("processRecords", () => { expect(mockRecordResult).toHaveBeenCalledWith( expect.anything(), "target-1", - true, + 1, + 0, expect.any(Object), ); }); - it("calls recordResult(false) on 5xx before visibility change", async () => { + it("calls recordResult with failure count on 5xx when CB enabled", async () => { const targetCb = { ...DEFAULT_TARGET, delivery: { circuitBreaker: { enabled: true } }, }; mockLoadTargetConfig.mockResolvedValue(targetCb); + mockAdmit.mockResolvedValue({ + allowed: true, + consumedTokens: 1, + effectiveRate: 10, + }); mockDeliverPayload.mockResolvedValue({ outcome: "transient_failure", statusCode: 503, @@ -390,13 +450,14 @@ describe("processRecords", () => { expect(mockRecordResult).toHaveBeenCalledWith( expect.anything(), "target-1", - false, + 1, + 1, expect.any(Object), ); expect(mockChangeVisibility).toHaveBeenCalled(); }); - it("does not call recordResult on rate-limited path", async () => { + it("does not call recordResult on gate admission-denied path", async () => { mockAdmit.mockResolvedValue({ allowed: false, reason: "rate_limited", @@ -409,17 +470,6 @@ describe("processRecords", () => { expect(mockRecordResult).not.toHaveBeenCalled(); }); - it("does not call recordResult on 429 path", async () => { - mockDeliverPayload.mockResolvedValue({ - outcome: "rate_limited", - retryAfterHeader: "60", - }); - - await processRecords([makeRecord()]); - - expect(mockRecordResult).not.toHaveBeenCalled(); - }); - it("does not call recordResult when CB is disabled on transient failure", async () => { const targetNoCb = { ...DEFAULT_TARGET, @@ -449,7 +499,7 @@ describe("processRecords", () => { expect(mockRecordResult).not.toHaveBeenCalled(); }); - it("emits CircuitBreakerOpen metric when recordResult returns opened", async () => { + it("records CircuitBreakerOpen when recordResult indicates circuit opened", async () => { const targetCb = { ...DEFAULT_TARGET, delivery: { circuitBreaker: { enabled: true } }, @@ -459,18 +509,21 @@ describe("processRecords", () => { outcome: "transient_failure", statusCode: 503, }); - mockRecordResult.mockResolvedValue({ ok: false, state: "opened" }); + mockRecordResult.mockResolvedValue({ + circuitState: "open", + stateChanged: true, + }); - const { emitCircuitBreakerOpen } = jest.requireMock( - "services/delivery-metrics", + const { recordCircuitBreakerOpen } = jest.requireMock( + "services/delivery-observability", ); await processRecords([makeRecord()]); - expect(emitCircuitBreakerOpen).toHaveBeenCalledWith("target-1"); + expect(recordCircuitBreakerOpen).toHaveBeenCalledWith("target-1"); }); - it("does not emit CircuitBreakerOpen when recordResult returns failed", async () => { + it("does not record CircuitBreakerOpen when recordResult has no state change", async () => { const targetCb = { ...DEFAULT_TARGET, delivery: { circuitBreaker: { enabled: true } }, @@ -480,18 +533,21 @@ describe("processRecords", () => { outcome: "transient_failure", statusCode: 503, }); - mockRecordResult.mockResolvedValue({ ok: false, state: "failed" }); + mockRecordResult.mockResolvedValue({ + circuitState: "open", + stateChanged: false, + }); - const { emitCircuitBreakerOpen } = jest.requireMock( - "services/delivery-metrics", + const { recordCircuitBreakerOpen } = jest.requireMock( + "services/delivery-observability", ); await processRecords([makeRecord()]); - expect(emitCircuitBreakerOpen).not.toHaveBeenCalled(); + expect(recordCircuitBreakerOpen).not.toHaveBeenCalled(); }); - it("does not emit CircuitBreakerOpen when recordResult returns closed", async () => { + it("does not record CircuitBreakerOpen when circuit is closed", async () => { const targetCb = { ...DEFAULT_TARGET, delivery: { circuitBreaker: { enabled: true } }, @@ -501,28 +557,61 @@ describe("processRecords", () => { outcome: "transient_failure", statusCode: 503, }); - mockRecordResult.mockResolvedValue({ ok: true, state: "closed" }); + mockRecordResult.mockResolvedValue({ + circuitState: "closed", + stateChanged: false, + }); - const { emitCircuitBreakerOpen } = jest.requireMock( - "services/delivery-metrics", + const { recordCircuitBreakerOpen } = jest.requireMock( + "services/delivery-observability", ); await processRecords([makeRecord()]); - expect(emitCircuitBreakerOpen).not.toHaveBeenCalled(); + expect(recordCircuitBreakerOpen).not.toHaveBeenCalled(); }); - it("emits RateLimited metric on 429 response", async () => { + it("records CircuitBreakerClosed when recordResult indicates circuit closed", async () => { + const targetCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: true } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetCb); + mockDeliverPayload.mockResolvedValue({ + outcome: "success", + statusCode: 200, + }); + mockRecordResult.mockResolvedValue({ + circuitState: "closed_recovery", + stateChanged: true, + }); + + const { recordCircuitBreakerClosed } = jest.requireMock( + "services/delivery-observability", + ); + + await processRecords([makeRecord()]); + + expect(recordCircuitBreakerClosed).toHaveBeenCalledWith("target-1"); + }); + + it("records RateLimited on 429 response", async () => { mockDeliverPayload.mockResolvedValue({ outcome: "rate_limited", retryAfterHeader: "60", }); - const { emitRateLimited } = jest.requireMock("services/delivery-metrics"); + const { recordDeliveryRateLimited } = jest.requireMock( + "services/delivery-observability", + ); await processRecords([makeRecord()]); - expect(emitRateLimited).toHaveBeenCalledWith("target-1"); + expect(recordDeliveryRateLimited).toHaveBeenCalledWith( + "client-1", + "target-1", + "test-message-id", + ); }); it("uses configured maxRetryDurationSeconds when set on target", async () => { @@ -558,4 +647,94 @@ describe("processRecords", () => { 7_200_000, ); }); + + it("groups records by target and processes each batch separately", async () => { + const record1 = makeRecord({ messageId: "msg-1" }); + const record2 = makeRecord({ + messageId: "msg-2", + body: JSON.stringify({ + payload: { + data: [ + { + type: "MessageStatus", + attributes: { messageStatus: "delivered" }, + }, + ], + }, + subscriptionId: "sub-2", + targetId: "target-2", + }), + }); + + const failures = await processRecords([record1, record2]); + + expect(failures).toEqual([]); + expect(mockAdmit).toHaveBeenCalledTimes(2); + expect(mockLoadTargetConfig).toHaveBeenCalledWith("client-1", "target-1"); + expect(mockLoadTargetConfig).toHaveBeenCalledWith("client-1", "target-2"); + }); + + it("calls recordAdmissionDenied with correlationIds when batch denied", async () => { + const record1 = makeRecord({ messageId: "msg-1" }); + const record2 = makeRecord({ messageId: "msg-2" }); + + mockAdmit.mockResolvedValue({ + allowed: false, + reason: "circuit_open", + retryAfterMs: 30_000, + effectiveRate: 0, + }); + + const { recordAdmissionDenied } = jest.requireMock( + "services/delivery-observability", + ); + + await processRecords([record1, record2]); + + expect(recordAdmissionDenied).toHaveBeenCalledWith( + "client-1", + "target-1", + "circuit_open", + ["test-message-id", "test-message-id"], + ); + }); + + it("logs deliveredCount and dlqCount in batch complete", async () => { + const record1 = makeRecord({ messageId: "msg-1" }); + const record2 = makeRecord({ messageId: "msg-2" }); + + mockDeliverPayload + .mockResolvedValueOnce({ outcome: "success" }) + .mockResolvedValueOnce({ outcome: "permanent_failure" }); + + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); + + await processRecords([record1, record2]); + + expect(logger.info).toHaveBeenCalledWith( + "Batch complete", + expect.objectContaining({ + batchSize: 2, + deliveredCount: 1, + dlqCount: 1, + failureCount: 0, + }), + ); + }); + + it("includes correlationId in error log on unexpected delivery failure", async () => { + mockDeliverPayload.mockRejectedValue(new Error("Connection reset")); + + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); + + await processRecords([makeRecord()]); + + expect(logger.error).toHaveBeenCalledWith( + "Failed to process record", + expect.objectContaining({ + messageId: "msg-1", + correlationId: "test-message-id", + }), + ); + }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts b/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts index 515f1377..00e04707 100644 --- a/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts @@ -1,31 +1,35 @@ import recordResultLuaSrc from "services/record-result.lua"; import { createRedisStore, evalLua } from "__tests__/helpers/lua-redis-mock"; -// ARGV: [now, success, cooldownMs, decayPeriodMs, cbErrorThreshold, cbMinAttempts, cbWindowPeriodMs] -// KEYS: [cbKey] -// Returns: [ok (0|1), state] state: "closed" | "opened" | "failed" +// ARGV: [now, consumedTokens, processingFailures, cooldownPeriodMs, recoveryPeriodMs, failureThreshold, minAttempts, samplePeriodMs] +// KEYS: [epKey] +// Returns: [circuitState, stateChanged] +// circuitState: "open" | "half_open" | "closed_recovery" | "closed" +// stateChanged: 0 | 1 type RecordResultArgs = { now: number; - success: boolean; - cooldownMs: number; - decayPeriodMs: number; - cbErrorThreshold: number; - cbMinAttempts: number; - cbWindowPeriodMs: number; + consumedTokens: number; + processingFailures: number; + cooldownPeriodMs: number; + recoveryPeriodMs: number; + failureThreshold: number; + minAttempts: number; + samplePeriodMs: number; }; const defaultArgs: RecordResultArgs = { now: 1_000_000, - success: true, - cooldownMs: 60_000, - decayPeriodMs: 300_000, - cbErrorThreshold: 0.5, - cbMinAttempts: 10, - cbWindowPeriodMs: 60_000, + consumedTokens: 1, + processingFailures: 0, + cooldownPeriodMs: 120_000, + recoveryPeriodMs: 600_000, + failureThreshold: 0.3, + minAttempts: 5, + samplePeriodMs: 300_000, }; -type RecordResultResult = [number, string]; +type RecordResultResult = [string, number]; function runRecordResult( store: ReturnType, @@ -35,15 +39,16 @@ function runRecordResult( const merged = { ...defaultArgs, ...args }; return evalLua( recordResultLuaSrc, - [`cb:${targetId}`], + [`ep:${targetId}`], [ merged.now.toString(), - merged.success ? "1" : "0", - merged.cooldownMs.toString(), - merged.decayPeriodMs.toString(), - merged.cbErrorThreshold.toString(), - merged.cbMinAttempts.toString(), - merged.cbWindowPeriodMs.toString(), + merged.consumedTokens.toString(), + merged.processingFailures.toString(), + merged.cooldownPeriodMs.toString(), + merged.recoveryPeriodMs.toString(), + merged.failureThreshold.toString(), + merged.minAttempts.toString(), + merged.samplePeriodMs.toString(), ], store, ) as RecordResultResult; @@ -51,323 +56,405 @@ function runRecordResult( describe("record-result.lua", () => { describe("success recording", () => { - it("returns closed state for a successful result", () => { + it("returns closed state for a successful batch with no state change", () => { const store = createRedisStore(); - const [ok, state] = runRecordResult(store, { success: true }); + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", "9999999999"], + ]), + ); + + const [circuitState, stateChanged] = runRecordResult(store, { + consumedTokens: 5, + processingFailures: 0, + }); - expect(ok).toBe(1); - expect(state).toBe("closed"); + expect(circuitState).toBe("closed"); + expect(stateChanged).toBe(0); }); - it("increments attempt count without incrementing failures", () => { + it("increments cur_attempts without incrementing cur_failures", () => { const store = createRedisStore(); - runRecordResult(store, { success: true }); + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", "9999999999"], + ]), + ); + + runRecordResult(store, { consumedTokens: 3, processingFailures: 0 }); - const cbHash = store.get("cb:t1")!; - expect(cbHash.get("cb_attempts")).toBe("1"); - expect(cbHash.get("cb_failures")).toBe("0"); + const epHash = store.get("ep:t1")!; + expect(epHash.get("cur_attempts")).toBe("3"); + expect(epHash.get("cur_failures")).toBe("0"); }); }); describe("failure recording", () => { - it("increments both attempts and failures on error", () => { + it("increments both cur_attempts and cur_failures", () => { const store = createRedisStore(); - runRecordResult(store, { success: false }); - - const cbHash = store.get("cb:t1")!; - expect(cbHash.get("cb_attempts")).toBe("1"); - expect(cbHash.get("cb_failures")).toBe("1"); - }); + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", "9999999999"], + ]), + ); - it("returns failed state for a single failure below threshold", () => { - const store = createRedisStore(); - const [ok, state] = runRecordResult(store, { success: false }); + runRecordResult(store, { consumedTokens: 5, processingFailures: 1 }); - expect(ok).toBe(0); - expect(state).toBe("failed"); + const epHash = store.get("ep:t1")!; + expect(epHash.get("cur_attempts")).toBe("5"); + expect(epHash.get("cur_failures")).toBe("1"); }); - it("stays closed when below error threshold", () => { + it("returns closed state for failures below threshold", () => { const store = createRedisStore(); - const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", "9999999999"], + ]), + ); - for (let i = 0; i < 8; i++) { - runRecordResult(store, { now, success: true }); - } - for (let i = 0; i < 2; i++) { - runRecordResult(store, { now, success: false }); - } + const [circuitState, stateChanged] = runRecordResult(store, { + consumedTokens: 1, + processingFailures: 1, + }); - const [ok, state] = runRecordResult(store, { now, success: true }); - expect(ok).toBe(1); - expect(state).toBe("closed"); + expect(circuitState).toBe("closed"); + expect(stateChanged).toBe(0); }); }); - describe("circuit opening", () => { - it("opens circuit when error rate exceeds threshold", () => { + describe("recording guard — fully open", () => { + it("does not record attempts/failures when circuit is fully open", () => { const store = createRedisStore(); const now = 1_000_000; + const switchedAt = now - 10_000; - for (let i = 0; i < 4; i++) { - const [, state] = runRecordResult(store, { - now, - success: false, - cbMinAttempts: 5, - cbErrorThreshold: 0.5, - }); - expect(state).toBe("failed"); - } - - const [ok, state] = runRecordResult(store, { + store.set( + "ep:t1", + new Map([ + ["is_open", "1"], + ["switched_at", switchedAt.toString()], + ["sample_till", "9999999999"], + ["cur_attempts", "0"], + ["cur_failures", "0"], + ]), + ); + + runRecordResult(store, { now, - success: false, - cbMinAttempts: 5, - cbErrorThreshold: 0.5, + cooldownPeriodMs: 120_000, + consumedTokens: 5, + processingFailures: 3, }); - expect(ok).toBe(0); - expect(state).toBe("opened"); + + const epHash = store.get("ep:t1")!; + expect(epHash.get("cur_attempts")).toBe("0"); + expect(epHash.get("cur_failures")).toBe("0"); }); - it("does not open circuit when below minimum attempts", () => { + it("returns open when circuit is fully open and state unchanged", () => { const store = createRedisStore(); const now = 1_000_000; + const switchedAt = now - 10_000; - for (let i = 0; i < 4; i++) { - runRecordResult(store, { - now, - success: false, - cbMinAttempts: 10, - }); - } + store.set( + "ep:t1", + new Map([ + ["is_open", "1"], + ["switched_at", switchedAt.toString()], + ["sample_till", "9999999999"], + ]), + ); - const [ok, state] = runRecordResult(store, { + const [circuitState, stateChanged] = runRecordResult(store, { now, - success: false, - cbMinAttempts: 10, + cooldownPeriodMs: 120_000, + consumedTokens: 1, + processingFailures: 0, }); - expect(ok).toBe(0); - expect(state).toBe("failed"); + + expect(circuitState).toBe("open"); + expect(stateChanged).toBe(0); }); + }); - it("sets opened_until_ms with cooldown on open", () => { + describe("circuit opening", () => { + it("opens circuit when failure rate exceeds threshold", () => { const store = createRedisStore(); - const now = 1_000_000; - const cooldownMs = 30_000; - - for (let i = 0; i < 5; i++) { - runRecordResult(store, { - now, - success: false, - cbMinAttempts: 5, - cbErrorThreshold: 0.5, - cooldownMs, - }); - } - - const cbHash = store.get("cb:t1")!; - expect(Number(cbHash.get("opened_until_ms"))).toBe(now + cooldownMs); + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", "9999999999"], + ]), + ); + + const [circuitState, stateChanged] = runRecordResult(store, { + consumedTokens: 5, + processingFailures: 5, + minAttempts: 5, + failureThreshold: 0.3, + }); + expect(circuitState).toBe("open"); + expect(stateChanged).toBe(1); }); - it("resets all counters on open", () => { + it("does not open circuit when below minimum attempts", () => { const store = createRedisStore(); - const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", "9999999999"], + ]), + ); - for (let i = 0; i < 5; i++) { - runRecordResult(store, { - now, - success: false, - cbMinAttempts: 5, - cbErrorThreshold: 0.5, - }); - } - - const cbHash = store.get("cb:t1")!; - expect(cbHash.get("cb_failures")).toBe("0"); - expect(cbHash.get("cb_attempts")).toBe("0"); - expect(cbHash.get("cb_window_from")).toBe("0"); - expect(cbHash.get("cb_prev_failures")).toBe("0"); - expect(cbHash.get("cb_prev_attempts")).toBe("0"); + const [circuitState, stateChanged] = runRecordResult(store, { + consumedTokens: 3, + processingFailures: 3, + minAttempts: 5, + failureThreshold: 0.3, + }); + expect(circuitState).toBe("closed"); + expect(stateChanged).toBe(0); }); - it("does not double-trip when circuit is already open", () => { + it("sets is_open and switched_at on open", () => { const store = createRedisStore(); const now = 1_000_000; - const openedUntil = now + 60_000; - store.set( - "cb:t1", + "ep:t1", new Map([ - ["opened_until_ms", openedUntil.toString()], - ["cb_window_from", now.toString()], + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", "9999999999"], ]), ); - for (let i = 0; i < 20; i++) { - const [, state] = runRecordResult(store, { - now, - success: false, - cbMinAttempts: 5, - cbErrorThreshold: 0.5, - }); - expect(state).toBe("failed"); - } - - const cbHash = store.get("cb:t1")!; - expect(Number(cbHash.get("opened_until_ms"))).toBe(openedUntil); + runRecordResult(store, { + now, + consumedTokens: 5, + processingFailures: 5, + minAttempts: 5, + failureThreshold: 0.3, + }); + + const epHash = store.get("ep:t1")!; + expect(epHash.get("is_open")).toBe("1"); + expect(Number(epHash.get("switched_at"))).toBe(now); }); - }); - describe("two-window blended rate", () => { - it("blends previous window failures into current assessment", () => { + it("resets all counters and sets sampleTill on open", () => { const store = createRedisStore(); const now = 1_000_000; - const cbWindowPeriodMs = 60_000; - + const samplePeriodMs = 300_000; store.set( - "cb:t1", + "ep:t1", new Map([ - ["cb_window_from", now.toString()], - ["cb_prev_failures", "8"], - ["cb_prev_attempts", "10"], + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", "9999999999"], ]), ); - const [ok, state] = runRecordResult(store, { + runRecordResult(store, { now, - success: false, - cbWindowPeriodMs, - cbMinAttempts: 5, - cbErrorThreshold: 0.5, + consumedTokens: 5, + processingFailures: 5, + minAttempts: 5, + failureThreshold: 0.3, + samplePeriodMs, }); - expect(ok).toBe(0); - expect(state).toBe("opened"); + + const epHash = store.get("ep:t1")!; + expect(epHash.get("cur_failures")).toBe("0"); + expect(epHash.get("cur_attempts")).toBe("0"); + expect(epHash.get("prev_failures")).toBe("0"); + expect(epHash.get("prev_attempts")).toBe("0"); + expect(Number(epHash.get("sample_till"))).toBe(now + samplePeriodMs); }); + }); - it("reduces previous window weight as current window ages", () => { + describe("circuit closing — half-open with successes", () => { + it("closes circuit when half-open and batch has successes", () => { const store = createRedisStore(); - const cbWindowPeriodMs = 100_000; - const t0 = 1_000_000; - const nearEnd = t0 + cbWindowPeriodMs - 1; + const now = 1_000_000; + const switchedAt = now - 130_000; store.set( - "cb:t1", + "ep:t1", new Map([ - ["cb_window_from", t0.toString()], - ["cb_prev_failures", "10"], - ["cb_prev_attempts", "10"], + ["is_open", "1"], + ["switched_at", switchedAt.toString()], + ["sample_till", "9999999999"], ]), ); - for (let i = 0; i < 20; i++) { - runRecordResult(store, { - now: nearEnd, - success: true, - cbWindowPeriodMs, - cbMinAttempts: 5, - cbErrorThreshold: 0.5, - }); - } - - const [, state] = runRecordResult(store, { - now: nearEnd, - success: false, - cbWindowPeriodMs, - cbMinAttempts: 5, - cbErrorThreshold: 0.5, + const [circuitState, stateChanged] = runRecordResult(store, { + now, + cooldownPeriodMs: 120_000, + consumedTokens: 1, + processingFailures: 0, }); - expect(state).toBe("failed"); + + expect(circuitState).toBe("closed_recovery"); + expect(stateChanged).toBe(1); + + const epHash = store.get("ep:t1")!; + expect(epHash.get("is_open")).toBe("0"); + expect(Number(epHash.get("switched_at"))).toBe(now); }); - it("ignores previous window when cbWindowPeriodMs is 0", () => { + it("does not close when half-open but all attempts failed", () => { const store = createRedisStore(); const now = 1_000_000; + const switchedAt = now - 130_000; store.set( - "cb:t1", + "ep:t1", new Map([ - ["cb_window_from", now.toString()], - ["cb_prev_failures", "100"], - ["cb_prev_attempts", "100"], + ["is_open", "1"], + ["switched_at", switchedAt.toString()], + ["sample_till", "9999999999"], ]), ); - const [, state] = runRecordResult(store, { + const [circuitState, stateChanged] = runRecordResult(store, { now, - success: false, - cbWindowPeriodMs: 0, - cbMinAttempts: 5, - cbErrorThreshold: 0.5, + cooldownPeriodMs: 120_000, + consumedTokens: 1, + processingFailures: 1, }); - expect(state).toBe("failed"); + + expect(circuitState).toBe("half_open"); + expect(stateChanged).toBe(0); }); }); - describe("decay period", () => { - it("preserves opened_until_ms during active decay", () => { + describe("sliding window management", () => { + it("promotes current to previous when sampleTill expires", () => { const store = createRedisStore(); - const openedUntil = 1_060_000; - const duringDecay = openedUntil + 100_000; + const now = 1_000_000; + const samplePeriodMs = 300_000; + const sampleTill = now - 1; store.set( - "cb:t1", - new Map([["opened_until_ms", openedUntil.toString()]]), + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", sampleTill.toString()], + ["cur_attempts", "10"], + ["cur_failures", "3"], + ["prev_attempts", "0"], + ["prev_failures", "0"], + ]), ); - runRecordResult(store, { - now: duringDecay, - success: true, - decayPeriodMs: 300_000, - }); + runRecordResult(store, { now, samplePeriodMs, consumedTokens: 1 }); - const cbHash = store.get("cb:t1")!; - expect(Number(cbHash.get("opened_until_ms"))).toBe(openedUntil); + const epHash = store.get("ep:t1")!; + expect(epHash.get("prev_attempts")).toBe("10"); + expect(epHash.get("prev_failures")).toBe("3"); + expect(Number(epHash.get("sample_till"))).toBe( + sampleTill + samplePeriodMs, + ); }); - it("clears opened_until_ms after decay period elapses", () => { + it("complete reset when window is too old", () => { const store = createRedisStore(); - const openedUntil = 1_060_000; - const decayPeriodMs = 300_000; - const afterDecay = openedUntil + decayPeriodMs + 1; + const now = 1_000_000; + const samplePeriodMs = 300_000; + const sampleTill = now - samplePeriodMs - 1; store.set( - "cb:t1", - new Map([["opened_until_ms", openedUntil.toString()]]), + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", sampleTill.toString()], + ["cur_attempts", "10"], + ["cur_failures", "3"], + ["prev_attempts", "5"], + ["prev_failures", "2"], + ]), ); - runRecordResult(store, { - now: afterDecay, - success: true, - decayPeriodMs, - }); + runRecordResult(store, { now, samplePeriodMs, consumedTokens: 1 }); - const cbHash = store.get("cb:t1")!; - expect(cbHash.get("opened_until_ms")).toBe("0"); + const epHash = store.get("ep:t1")!; + expect(epHash.get("prev_attempts")).toBe("0"); + expect(epHash.get("prev_failures")).toBe("0"); + expect(Number(epHash.get("sample_till"))).toBe(now + samplePeriodMs); }); - it("clears opened_until_ms when circuit was never opened", () => { + it("interpolates using weight from sampleTill", () => { const store = createRedisStore(); + const samplePeriodMs = 300_000; const now = 1_000_000; + const sampleTill = now + samplePeriodMs; - runRecordResult(store, { now, success: true }); + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", sampleTill.toString()], + ["prev_attempts", "10"], + ["prev_failures", "10"], + ]), + ); - const cbHash = store.get("cb:t1")!; - expect(cbHash.get("opened_until_ms")).toBe("0"); + // weight = (sampleTill - now) / samplePeriodMs = 1.0 + // interpolated attempts = 10 * 1.0 + 5 = 15 (>= minAttempts 5) + // interpolated failures = 10 * 1.0 + 5 = 15 + // failure rate = 15/15 = 1.0 > 0.3 → opens + const [circuitState, stateChanged] = runRecordResult(store, { + now, + samplePeriodMs, + consumedTokens: 5, + processingFailures: 5, + minAttempts: 5, + failureThreshold: 0.3, + }); + expect(circuitState).toBe("open"); + expect(stateChanged).toBe(1); }); }); describe("state persistence", () => { - it("writes all counter fields to redis", () => { + it("writes all sampling fields to redis", () => { const store = createRedisStore(); + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", "9999999999"], + ]), + ); runRecordResult(store); - const cbHash = store.get("cb:t1")!; - expect(cbHash.has("opened_until_ms")).toBe(true); - expect(cbHash.has("cb_window_from")).toBe(true); - expect(cbHash.has("cb_failures")).toBe(true); - expect(cbHash.has("cb_attempts")).toBe(true); - expect(cbHash.has("cb_prev_failures")).toBe(true); - expect(cbHash.has("cb_prev_attempts")).toBe(true); + const epHash = store.get("ep:t1")!; + expect(epHash.has("cur_attempts")).toBe(true); + expect(epHash.has("cur_failures")).toBe(true); + expect(epHash.has("prev_attempts")).toBe(true); + expect(epHash.has("prev_failures")).toBe(true); + expect(epHash.has("sample_till")).toBe(true); }); }); }); diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index 28fcc6b9..19420462 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -12,7 +12,6 @@ import { OUTCOME_SUCCESS, deliverPayload, } from "services/delivery/https-client"; -import type { DeliveryResult } from "services/delivery/https-client"; import { sendToDlq } from "services/dlq-sender"; import { changeVisibility } from "services/sqs-visibility"; import { @@ -26,7 +25,6 @@ import { recordResult, } from "services/endpoint-gate"; import { getRedisClient } from "services/redis-client"; -import { VisibilityManagedError } from "services/visibility-managed-error"; import { recordAdmissionDenied, recordCircuitBreakerClosed, @@ -47,13 +45,20 @@ const DEFAULT_MAX_RETRY_DURATION_MS = 7_200_000; // 2 hours const DEFAULT_CONCURRENCY_LIMIT = 5; const gateConfig: EndpointGateConfig = { - burstCapacity: Number(process.env.TOKEN_BUCKET_BURST_CAPACITY ?? "10"), - cbProbeIntervalMs: Number(process.env.CB_PROBE_INTERVAL_MS ?? "60000"), - decayPeriodMs: Number(process.env.CB_DECAY_PERIOD_MS ?? "300000"), - cbWindowPeriodMs: Number(process.env.CB_WINDOW_PERIOD_MS ?? "60000"), - cbErrorThreshold: Number(process.env.CB_ERROR_THRESHOLD ?? "0.5"), - cbMinAttempts: Number(process.env.CB_MIN_ATTEMPTS ?? "10"), - cbCooldownMs: Number(process.env.CB_COOLDOWN_MS ?? "60000"), + // Max tokens the bucket can hold — absorbs short traffic bursts without throttling (default: 2250) + burstCapacity: Number(process.env.TOKEN_BUCKET_BURST_CAPACITY ?? "2250"), + // Probe rate to test endpoint recovery when half-open (default: 1/60 req/s) + probeRateLimit: Number(process.env.CB_PROBE_RATE_LIMIT ?? String(1 / 60)), + // Linear ramp-up after circuit closes, avoids flooding a freshly recovered endpoint (default: 10 min) + recoveryPeriodMs: Number(process.env.CB_RECOVERY_PERIOD_MS ?? "600000"), + // Sliding window over which failure rates are sampled (default: 5 min) + samplePeriodMs: Number(process.env.CB_SAMPLE_PERIOD_MS ?? "300000"), + // Failure rate within the sample window that triggers circuit open (default: 30%) + failureThreshold: Number(process.env.CB_FAILURE_THRESHOLD ?? "0.3"), + // Minimum attempts in the sample window before the failure rate is evaluated (default: 5 attempts) + minAttempts: Number(process.env.CB_MIN_ATTEMPTS ?? "5"), + // Full block after circuit opens, before half-open probes begin (default: 2 min) + cooldownPeriodMs: Number(process.env.CB_COOLDOWN_PERIOD_MS ?? "120000"), }; type CallbackDeliveryMessage = { @@ -62,223 +67,319 @@ type CallbackDeliveryMessage = { targetId: string; }; -async function checkAdmission( - redis: RedisClientType, - targetId: string, - invocationRateLimit: number, - cbEnabled: boolean, - clientId: string, - record: SQSRecord, - correlationId?: string, -): Promise { - const gateResult = await admit( - redis, - targetId, - invocationRateLimit, - cbEnabled, - gateConfig, - ); +type TargetBatch = { + targetId: string; + records: SQSRecord[]; + messages: CallbackDeliveryMessage[]; +}; - if (!gateResult.allowed) { - const delaySec = Math.ceil(gateResult.retryAfterMs / 1000); - recordAdmissionDenied(clientId, targetId, gateResult.reason, correlationId); - await changeVisibility(record.receiptHandle, delaySec); - throw new VisibilityManagedError(`Admission denied: ${gateResult.reason}`); +function groupByTarget(records: SQSRecord[]): TargetBatch[] { + const groups = new Map< + string, + { records: SQSRecord[]; messages: CallbackDeliveryMessage[] } + >(); + + for (const record of records) { + const message: CallbackDeliveryMessage = JSON.parse(record.body); + const existing = groups.get(message.targetId); + if (existing) { + existing.records.push(record); + existing.messages.push(message); + } else { + groups.set(message.targetId, { records: [record], messages: [message] }); + } } + + return [...groups.entries()].map( + ([targetId, { messages, records: recs }]) => ({ + targetId, + records: recs, + messages, + }), + ); } -const OUTCOME_DELIVERED = "delivered" as const; -const OUTCOME_DLQ = "dlq" as const; -type RecordOutcome = typeof OUTCOME_DELIVERED | typeof OUTCOME_DLQ; +function extractCorrelationId( + message: CallbackDeliveryMessage, +): string | undefined { + return message.payload.data[0]?.attributes?.messageId; +} -async function handleDeliveryResult( - result: DeliveryResult, +async function deliverRecord( record: SQSRecord, - redis: RedisClientType, + message: CallbackDeliveryMessage, + target: Awaited>, + applicationId: string, clientId: string, - targetId: string, - cbEnabled: boolean, - correlationId?: string, -): Promise { +): Promise<{ success: boolean; dlq: boolean }> { + const correlationId = extractCorrelationId(message); + + const maxRetryDurationMs = + target.delivery?.maxRetryDurationSeconds === undefined + ? DEFAULT_MAX_RETRY_DURATION_MS + : target.delivery.maxRetryDurationSeconds * 1000; + + const firstReceivedMs = Number( + record.attributes.ApproximateFirstReceiveTimestamp, + ); + + if (isWindowExhausted(firstReceivedMs, maxRetryDurationMs)) { + recordRetryWindowExhausted(clientId, message.targetId, correlationId); + await sendToDlq(record.body); + return { success: true, dlq: true }; + } + + const agent = await buildAgent(target); + const signature = signPayload( + applicationId, + target.apiKey.headerValue, + message.payload, + ); + const payloadJson = JSON.stringify(message.payload); + + recordDeliveryAttempt( + clientId, + message.targetId, + correlationId, + record.messageId, + Number(record.attributes.ApproximateReceiveCount), + ); + const deliveryStart = Date.now(); + const result = await deliverPayload(target, payloadJson, signature, agent); + recordDeliveryDuration(message.targetId, Date.now() - deliveryStart); + if (result.outcome === OUTCOME_SUCCESS) { - if (cbEnabled) { - const cbOutcome = await recordResult(redis, targetId, true, gateConfig); - if (cbOutcome.ok && cbOutcome.state === "closed") { - recordCircuitBreakerClosed(targetId, correlationId); - } - } - recordDeliverySuccess(clientId, targetId, correlationId); - return OUTCOME_DELIVERED; + recordDeliverySuccess(clientId, message.targetId, correlationId); + return { success: true, dlq: false }; } if (result.outcome === OUTCOME_PERMANENT_FAILURE) { recordDeliveryPermanentFailure( clientId, - targetId, + message.targetId, result.statusCode, result.errorCode, correlationId, ); await sendToDlq(record.body, result); - return OUTCOME_DLQ; + return { success: true, dlq: true }; } if (result.outcome === OUTCOME_RATE_LIMITED) { const receiveCount = Number(record.attributes.ApproximateReceiveCount); - recordDeliveryRateLimited(clientId, targetId, correlationId); + recordDeliveryRateLimited(clientId, message.targetId, correlationId); await handleRateLimitedRecord( record, clientId, - targetId, + message.targetId, result.retryAfterHeader, receiveCount, ); - return OUTCOME_DELIVERED; // unreachable — handleRateLimitedRecord always throws + return { success: true, dlq: false }; } const receiveCount = Number(record.attributes.ApproximateReceiveCount); const backoffSec = jitteredBackoffSeconds(receiveCount); - if (cbEnabled) { - const cbOutcome = await recordResult(redis, targetId, false, gateConfig); - if (cbOutcome.state === "opened") { - recordCircuitBreakerOpen(targetId, correlationId); - } - } recordDeliveryFailure( clientId, - targetId, + message.targetId, result.statusCode, backoffSec, receiveCount, correlationId, ); await changeVisibility(record.receiptHandle, backoffSec); - throw new VisibilityManagedError(`Transient failure: ${result.statusCode}`); + return { success: false, dlq: false }; } -async function processRecord( - record: SQSRecord, - redis: RedisClientType, -): Promise { - const { CLIENT_ID } = process.env; - if (!CLIENT_ID) { - throw new Error("CLIENT_ID is required"); - } - - const message: CallbackDeliveryMessage = JSON.parse(record.body); - const { payload, targetId } = message; - const messageId = payload.data[0]?.attributes?.messageId; - - logger.info("Processing delivery", { - clientId: CLIENT_ID, - targetId, - messageId, - sqsMessageId: record.messageId, - receiveCount: record.attributes.ApproximateReceiveCount, - }); - - const target = await loadTargetConfig(CLIENT_ID, targetId); - const maxRetryDurationMs = - target.delivery?.maxRetryDurationSeconds === undefined - ? DEFAULT_MAX_RETRY_DURATION_MS - : target.delivery.maxRetryDurationSeconds * 1000; - - const firstReceivedMs = Number( - record.attributes.ApproximateFirstReceiveTimestamp, - ); +type TargetBatchResult = { + failures: SQSBatchItemFailure[]; + deliveredCount: number; + dlqCount: number; +}; - if (isWindowExhausted(firstReceivedMs, maxRetryDurationMs)) { - recordRetryWindowExhausted(CLIENT_ID, targetId, messageId); - await sendToDlq(record.body); - return OUTCOME_DLQ; +async function handleBatchDenied( + batch: TargetBatch, + clientId: string, + reason: string, + retryAfterMs: number, +): Promise { + const delaySec = Math.ceil(retryAfterMs / 1000); + const correlationIds = batch.messages.map((m) => extractCorrelationId(m)); + recordAdmissionDenied(clientId, batch.targetId, reason, correlationIds); + const failures: SQSBatchItemFailure[] = []; + for (const record of batch.records) { + // eslint-disable-next-line sonarjs/pseudo-random -- jitter for backoff, not security-sensitive + const jitterSec = Math.floor(Math.random() * 5); + await changeVisibility(record.receiptHandle, delaySec + jitterSec); + failures.push({ itemIdentifier: record.messageId }); } + return { failures, deliveredCount: 0, dlqCount: 0 }; +} - const applicationId = await getApplicationId(CLIENT_ID); +async function processTargetBatch( + batch: TargetBatch, + redis: RedisClientType, + clientId: string, + concurrencyLimit: number, +): Promise { + const target = await loadTargetConfig(clientId, batch.targetId); const cbEnabled = target.delivery?.circuitBreaker?.enabled ?? false; - await checkAdmission( + const gateResult = await admit( redis, - targetId, + batch.targetId, target.invocationRateLimit, cbEnabled, - CLIENT_ID, - record, - messageId, + batch.records.length, + gateConfig, ); - const agent = await buildAgent(target); - const signature = signPayload( - applicationId, - target.apiKey.headerValue, - payload, - ); - const payloadJson = JSON.stringify(payload); + if (!gateResult.allowed) { + return handleBatchDenied( + batch, + clientId, + gateResult.reason, + gateResult.retryAfterMs, + ); + } - recordDeliveryAttempt(CLIENT_ID, targetId, messageId); - const deliveryStart = Date.now(); - const result = await deliverPayload(target, payloadJson, signature, agent); - recordDeliveryDuration(targetId, Date.now() - deliveryStart); + const { consumedTokens } = gateResult; + const admitted = batch.records.slice(0, consumedTokens); + const rejected = batch.records.slice(consumedTokens); + const admittedMessages = batch.messages.slice(0, consumedTokens); - return handleDeliveryResult( - result, - record, - redis, - CLIENT_ID, - targetId, - cbEnabled, - messageId, + const applicationId = await getApplicationId(clientId); + + const failures: SQSBatchItemFailure[] = []; + let processingFailures = 0; + + const deliveryResults = await pMap( + admitted, + async ( + record, + index, + ): Promise<{ record: SQSRecord; success: boolean; dlq: boolean }> => { + try { + const outcome = await deliverRecord( + record, + admittedMessages[index], + target, + applicationId, + clientId, + ); + return { record, success: outcome.success, dlq: outcome.dlq }; + } catch (error) { + const correlationId = extractCorrelationId(admittedMessages[index]); + logger.error("Failed to process record", { + messageId: record.messageId, + correlationId, + err: error, + }); + const receiveCount = Number(record.attributes.ApproximateReceiveCount); + await changeVisibility( + record.receiptHandle, + jitteredBackoffSeconds(receiveCount), + ); + return { record, success: false, dlq: false }; + } + }, + { concurrency: concurrencyLimit }, ); + + for (const { record, success } of deliveryResults) { + if (!success) { + processingFailures += 1; + failures.push({ itemIdentifier: record.messageId }); + } + } + + const deliveredCount = deliveryResults.filter( + (r) => r.success && !r.dlq, + ).length; + const dlqCount = deliveryResults.filter((r) => r.dlq).length; + + if (cbEnabled && consumedTokens > 0) { + const cbOutcome = await recordResult( + redis, + batch.targetId, + consumedTokens, + processingFailures, + gateConfig, + ); + if (cbOutcome.stateChanged && cbOutcome.circuitState === "open") { + recordCircuitBreakerOpen(batch.targetId); + } + if ( + cbOutcome.stateChanged && + cbOutcome.circuitState === "closed_recovery" + ) { + recordCircuitBreakerClosed(batch.targetId); + } + } + + if (rejected.length > 0) { + const rejectedMessages = batch.messages.slice(consumedTokens); + const rejectedCorrelationIds = rejectedMessages.map((m) => + extractCorrelationId(m), + ); + recordAdmissionDenied( + clientId, + batch.targetId, + "rate_limited", + rejectedCorrelationIds, + ); + for (const record of rejected) { + await changeVisibility(record.receiptHandle, 1); + failures.push({ itemIdentifier: record.messageId }); + } + } + + return { failures, deliveredCount, dlqCount }; } export async function processRecords( records: SQSRecord[], ): Promise { - resetMetrics(); + const { CLIENT_ID } = process.env; + if (!CLIENT_ID) { + throw new Error("CLIENT_ID is required"); + } - logger.info("Batch received", { batchSize: records.length }); + resetMetrics(); const concurrencyLimit = Number( process.env.CONCURRENCY_LIMIT ?? String(DEFAULT_CONCURRENCY_LIMIT), ); + logger.info("Batch received", { batchSize: records.length }); + const redis = await getRedisClient(); + const targetBatches = groupByTarget(records); - const results = await pMap( - records, - async (record): Promise => { - try { - return await processRecord(record, redis); - } catch (error) { - if (!(error instanceof VisibilityManagedError)) { - logger.error("Failed to process record", { - messageId: record.messageId, - err: error, - }); - const receiveCount = Number( - record.attributes.ApproximateReceiveCount, - ); - await changeVisibility( - record.receiptHandle, - jitteredBackoffSeconds(receiveCount), - ); - } - return { itemIdentifier: record.messageId }; - } - }, - { concurrency: concurrencyLimit }, - ); + const allFailures: SQSBatchItemFailure[] = []; + let totalDelivered = 0; + let totalDlq = 0; + + for (const batch of targetBatches) { + const batchResult = await processTargetBatch( + batch, + redis, + CLIENT_ID, + concurrencyLimit, + ); + allFailures.push(...batchResult.failures); + totalDelivered += batchResult.deliveredCount; + totalDlq += batchResult.dlqCount; + } - await flushMetrics(); - const failures = results.filter( - (r): r is SQSBatchItemFailure => typeof r === "object", - ); - const deliveredCount = results.filter((r) => r === OUTCOME_DELIVERED).length; - const dlqCount = results.filter((r) => r === OUTCOME_DLQ).length; logger.info("Batch complete", { batchSize: records.length, - deliveredCount, - dlqCount, - failureCount: failures.length, + deliveredCount: totalDelivered, + dlqCount: totalDlq, + failureCount: allFailures.length, }); - return failures; + + await flushMetrics(); + return allFailures; } diff --git a/lambdas/https-client-lambda/src/services/admit.lua b/lambdas/https-client-lambda/src/services/admit.lua index fd56decb..ed15a928 100644 --- a/lambdas/https-client-lambda/src/services/admit.lua +++ b/lambdas/https-client-lambda/src/services/admit.lua @@ -1,203 +1,114 @@ --- admit.lua — Decides whether a request to an endpoint is allowed. +-- admit.lua — Pre-processing: determines rate limit and consumes tokens. -- --- Three sequential checks run atomically: --- 1. Circuit breaker — is the endpoint currently healthy? --- 2. Sliding window — roll the two-window error-rate accounting state if needed --- 3. Token bucket — is the endpoint within its rate limit? +-- Two sequential steps run atomically: +-- 1. Circuit breaker — determine effective rate from circuit state +-- 2. Token bucket — consume tokens for the target batch -- --- A request is allowed only when all three checks pass. +-- The circuit has four states: +-- Open (during cooldown): rate = 0, complete block, bucket untouched +-- Half-open (after cooldown): rate = probeRateLimit +-- Recovering (closed, during recovery period): linear ramp-up +-- Normal (closed): full configured rate -- --- While the circuit is open, a timed probe is let through at most once per --- cbProbeIntervalMs so the caller can test whether the endpoint has recovered. --- The probe bypasses the rate limit — counting it here would skew a --- low-volume probe signal against the recovery decision. --- --- After the circuit closes, the token fill rate ramps up linearly from --- near-zero to full over decayPeriodMs to avoid a thundering herd on recovery. --- --- Returns: { allowed (0|1), reason, retryAfterMs, effectiveRate } +-- Returns: { consumedTokens, reason, retryAfterMs, effectiveRate } -- Keys -local cbKey = KEYS[1] -- cb:{endpoint} circuit breaker state hash -local rlKey = KEYS[2] -- rl:{endpoint} rate limiter state hash +local epKey = KEYS[1] -- ep:{targetId} combined endpoint state hash -- Arguments -local now = tonumber(ARGV[1]) or 0 -- current wall-clock time (ms) -local capacity = tonumber(ARGV[2]) or 0 -- token bucket maximum capacity -local refillPerSec = tonumber(ARGV[3]) or 0 -- full token fill rate (tokens/sec) -local cooldownMs = tonumber(ARGV[4]) or 0 -- how long the circuit stays open (ms) -local decayPeriodMs = tonumber(ARGV[5]) or 0 -- ramp-up window after circuit closes (ms) -local cbWindowPeriodMs = tonumber(ARGV[6]) or 0 -- error-rate sliding window duration (ms) -local cbProbeIntervalMs = tonumber(ARGV[7]) or 0 -- minimum gap between probe requests (ms; 0 = no probes) - --- TTL policy: circuit breaker state must outlive the cooldown window so that --- the ramp-up period remains visible to subsequent calls after a close. --- Rate limiter state needs only a short idle window. -local cbTtlSeconds = math.ceil(cooldownMs / 1000) + 60 -local rlTtlSeconds = 120 +local now = tonumber(ARGV[1]) or 0 +local capacity = tonumber(ARGV[2]) or 0 +local targetRateLimit = tonumber(ARGV[3]) or 0 +local cooldownMs = tonumber(ARGV[4]) or 0 +local recoveryPeriodMs = tonumber(ARGV[5]) or 0 +local probeRateLimit = tonumber(ARGV[6]) or 0 +local targetBatchSize = tonumber(ARGV[7]) or 0 +local cbEnabled = tonumber(ARGV[8]) == 1 -------------------------------------------------------------------------------- -- LOAD STATE -------------------------------------------------------------------------------- -local cb = redis.call("HMGET", cbKey, - "opened_until_ms", "cb_window_from", "cb_failures", "cb_attempts", "last_probe_ms", - "cb_prev_failures", "cb_prev_attempts") -local openedUntil = tonumber(cb[1] or "0") -local cbWindowFrom = tonumber(cb[2] or "0") -local cbFailures = tonumber(cb[3] or "0") -local cbAttempts = tonumber(cb[4] or "0") -local lastProbeMs = tonumber(cb[5] or "0") -local cbPrevFailures = tonumber(cb[6] or "0") -local cbPrevAttempts = tonumber(cb[7] or "0") - -local rl = redis.call("HMGET", rlKey, "tokens", "last_refill_ms") -local tokens = tonumber(rl[1] or capacity) -local lastRefill = tonumber(rl[2] or now) +local state = redis.call("HMGET", epKey, + "is_open", "switched_at", "bucket_tokens", "bucket_refilled_at") +local cbNeedInit = state[1] == false or state[1] == nil +local rlNeedInit = state[4] == false or state[4] == nil +local isOpen = cbNeedInit or tonumber(state[1]) == 1 +local switchedAt = cbNeedInit and 0 or tonumber(state[2] or "0") +local bucketTokens = tonumber(state[3] or "0") +local bucketRefilledAt = rlNeedInit and now or tonumber(state[4]) + +if not cbEnabled then + isOpen = false + switchedAt = 0 +end -------------------------------------------------------------------------------- --- 1. CIRCUIT BREAKER --- --- The circuit is open when openedUntil is set and has not yet elapsed. --- All requests are rejected while open to give the endpoint time to recover. --- --- Timed probes: once per cbProbeIntervalMs a single request is allowed --- through even while the circuit is open. The caller must record the --- outcome via record-result.lua; a successful probe will close the circuit --- and trigger the ramp-up phase. +-- 1. CIRCUIT BREAKER — determine effective rate -------------------------------------------------------------------------------- -if openedUntil > 0 and now < openedUntil then - -- Allow a probe through if the probe interval has elapsed - if cbProbeIntervalMs > 0 and (now - lastProbeMs) >= cbProbeIntervalMs then - lastProbeMs = now - redis.call("HSET", cbKey, - "opened_until_ms", openedUntil, - "cb_window_from", cbWindowFrom, - "cb_failures", cbFailures, - "cb_attempts", cbAttempts, - "last_probe_ms", lastProbeMs, - "cb_prev_failures", cbPrevFailures, - "cb_prev_attempts", cbPrevAttempts - ) - redis.call("EXPIRE", cbKey, cbTtlSeconds) - return { 1, "probe", 0, 0 } - end - - -- Circuit is open and no probe slot is available — reject - return { 0, "circuit_open", openedUntil - now, 0 } -end +local isHalfOpen = isOpen and now > switchedAt + cooldownMs +local isRecovering = (not isOpen) and now < switchedAt + recoveryPeriodMs --------------------------------------------------------------------------------- --- 2. SLIDING WINDOW --- --- Two windows (current + previous) together approximate a sliding window over --- cbWindowPeriodMs. When the current window expires it is promoted to previous --- and a fresh current window starts. record-result.lua blends the two windows --- using a time-based weight to smooth the error rate across the boundary rather --- than resetting it to zero at expiry. --- --- record-result.lua is responsible for incrementing the counters; this script --- is only responsible for rolling the window boundary forward when it expires. --------------------------------------------------------------------------------- +local effectiveRate -if cbWindowFrom == 0 then - -- No window exists yet — start one now - cbWindowFrom = now -elseif (now - cbWindowFrom) > cbWindowPeriodMs then - -- Current window has expired — roll it forward - if (now - cbWindowFrom) > (2 * cbWindowPeriodMs) then - -- Both current and previous windows are stale: a long quiet period means - -- old failure counts are no longer relevant to the health of the endpoint. - cbPrevFailures = 0 - cbPrevAttempts = 0 +if isOpen then + if isHalfOpen then + effectiveRate = probeRateLimit else - -- Promote current → previous so it can be blended with the new current window - cbPrevFailures = cbFailures - cbPrevAttempts = cbAttempts + return { 0, "circuit_open", (switchedAt + cooldownMs) - now, 0 } + end +else + if isRecovering then + local rampRange = math.max(0, targetRateLimit - probeRateLimit) + local rampProgress = math.max(0, now - switchedAt) / recoveryPeriodMs + effectiveRate = probeRateLimit + rampProgress * rampRange + else + effectiveRate = targetRateLimit end - cbFailures = 0 - cbAttempts = 0 - cbWindowFrom = now end -------------------------------------------------------------------------------- --- 3. TOKEN BUCKET +-- 2. TOKEN BUCKET — batch consumption +-- +-- Generate tokens based on elapsed time, then consume as many as needed for +-- the batch, up to the number available. -- --- Refills tokens based on elapsed time, then tries to consume one. --- If no tokens are available the request is rate-limited. +-- bucketRefilledAt tracks the point in time up to which tokens have been +-- generated. We advance it by exactly the time needed to produce the whole +-- tokens we generated (generationTime), rather than setting it to `now`. -- --- Ramp-up: after the circuit closes (openedUntil is set but in the past), --- effectiveRate scales linearly from near-zero to the full refillPerSec over --- decayPeriodMs. This deliberately slows recovery traffic so a flapping --- endpoint is not immediately overwhelmed. --- Once decayPeriodMs elapses, openedUntil is cleared and the full rate resumes. +-- Why not `now`? Token generation uses floor(), so any sub-token fractional +-- time is truncated. Setting bucketRefilledAt = now would discard that +-- remainder, meaning the next call starts its elapsed-time calculation from +-- a later point than it should. Over many calls this causes token leakage — +-- the bucket refills slower than the configured rate. By advancing only by +-- generationTime, the leftover fractional time carries over to the next call. -------------------------------------------------------------------------------- -local effectiveRate = refillPerSec - -if openedUntil > 0 and now > openedUntil and decayPeriodMs > 0 then - -- Circuit has recently closed — apply linear ramp-up - local sinceClose = now - openedUntil - if sinceClose >= decayPeriodMs then - -- Decay period fully elapsed — restore full rate and clear the CB timestamp - openedUntil = 0 - else - -- Still within decay period — scale fill rate proportionally to time elapsed - local fraction = sinceClose / decayPeriodMs - effectiveRate = math.max(1, math.floor(refillPerSec * fraction)) - end +if isOpen then + bucketTokens = 0 end --- Refill tokens based on time elapsed since last refill -local elapsed = now - lastRefill -if elapsed > 0 then - local refill = math.floor((elapsed * effectiveRate) / 1000) - if refill > 0 then - tokens = math.min(capacity, tokens + refill) - lastRefill = now - end -end +local generatedTokens = math.floor((now - bucketRefilledAt) * effectiveRate / 1000) +local availTokens = math.min(capacity, bucketTokens + generatedTokens) +local consumedTokens = math.min(targetBatchSize, availTokens) --- Not enough tokens — rate-limited --- TTL is intentionally not refreshed here; it was set on the last allowed call. -if tokens < 1 then - redis.call("HSET", cbKey, - "opened_until_ms", openedUntil, - "cb_window_from", cbWindowFrom, - "cb_failures", cbFailures, - "cb_attempts", cbAttempts, - "cb_prev_failures", cbPrevFailures, - "cb_prev_attempts", cbPrevAttempts - ) - redis.call("HSET", rlKey, - "tokens", tokens, - "last_refill_ms", lastRefill - ) - return { 0, "rate_limited", 1000, effectiveRate } +bucketTokens = availTokens - consumedTokens +if generatedTokens > 0 and effectiveRate > 0 then + local generationTime = generatedTokens * 1000 / effectiveRate + bucketRefilledAt = bucketRefilledAt + generationTime end --- Consume one token -tokens = tokens - 1 - -------------------------------------------------------------------------------- --- 4. PERSIST STATE AND ALLOW +-- 3. PERSIST STATE AND RETURN -------------------------------------------------------------------------------- -redis.call("HSET", cbKey, - "opened_until_ms", openedUntil, - "cb_window_from", cbWindowFrom, - "cb_failures", cbFailures, - "cb_attempts", cbAttempts, - "cb_prev_failures", cbPrevFailures, - "cb_prev_attempts", cbPrevAttempts +redis.call("HSET", epKey, + "bucket_tokens", bucketTokens, + "bucket_refilled_at", bucketRefilledAt ) -redis.call("HSET", rlKey, - "tokens", tokens, - "last_refill_ms", lastRefill -) - -redis.call("EXPIRE", cbKey, cbTtlSeconds) -redis.call("EXPIRE", rlKey, rlTtlSeconds) -return { 1, "allowed", 0, effectiveRate } +local reason = consumedTokens < 1 and "rate_limited" or "some_allowed" +local retryAfter = consumedTokens < 1 and 1000 or 0 +return { consumedTokens, reason, retryAfter, effectiveRate } diff --git a/lambdas/https-client-lambda/src/services/delivery-metrics.ts b/lambdas/https-client-lambda/src/services/delivery-metrics.ts index 68248591..dcefaf57 100644 --- a/lambdas/https-client-lambda/src/services/delivery-metrics.ts +++ b/lambdas/https-client-lambda/src/services/delivery-metrics.ts @@ -1,4 +1,8 @@ -import { Unit, createMetricsLogger } from "aws-embedded-metrics"; +import { + StorageResolution, + Unit, + createMetricsLogger, +} from "aws-embedded-metrics"; import type { MetricsLogger } from "aws-embedded-metrics"; let metricsInstance: MetricsLogger | undefined; @@ -28,56 +32,90 @@ function getMetrics(): MetricsLogger { export function emitDeliveryAttempt(targetId: string): void { const metrics = getMetrics(); metrics.setProperty("targetId", targetId); - metrics.putMetric("DeliveryAttempt", 1, Unit.Count); + metrics.putMetric("DeliveryAttempt", 1, Unit.Count, StorageResolution.High); } export function emitDeliverySuccess(targetId: string): void { const metrics = getMetrics(); metrics.setProperty("targetId", targetId); - metrics.putMetric("DeliverySuccess", 1, Unit.Count); + metrics.putMetric("DeliverySuccess", 1, Unit.Count, StorageResolution.High); } export function emitDeliveryFailure(targetId: string): void { const metrics = getMetrics(); metrics.setProperty("targetId", targetId); - metrics.putMetric("DeliveryFailure", 1, Unit.Count); + metrics.putMetric("DeliveryFailure", 1, Unit.Count, StorageResolution.High); } export function emitDeliveryPermanentFailure(targetId: string): void { const metrics = getMetrics(); metrics.setProperty("targetId", targetId); - metrics.putMetric("DeliveryPermanentFailure", 1, Unit.Count); + metrics.putMetric( + "DeliveryPermanentFailure", + 1, + Unit.Count, + StorageResolution.High, + ); } export function emitRateLimited(targetId: string): void { const metrics = getMetrics(); metrics.setProperty("targetId", targetId); - metrics.putMetric("DeliveryRateLimited", 1, Unit.Count); + metrics.putMetric( + "DeliveryRateLimited", + 1, + Unit.Count, + StorageResolution.High, + ); } export function emitCircuitBreakerOpen(targetId: string): void { const metrics = getMetrics(); metrics.setProperty("targetId", targetId); - metrics.putMetric("CircuitBreakerOpen", 1, Unit.Count); + metrics.putMetric( + "CircuitBreakerOpen", + 1, + Unit.Count, + StorageResolution.High, + ); } export function emitCircuitBreakerClosed(targetId: string): void { const metrics = getMetrics(); metrics.setProperty("targetId", targetId); - metrics.putMetric("CircuitBreakerClosed", 1, Unit.Count); + metrics.putMetric( + "CircuitBreakerClosed", + 1, + Unit.Count, + StorageResolution.High, + ); } export function emitRetryWindowExhausted(targetId: string): void { const metrics = getMetrics(); metrics.setProperty("targetId", targetId); - metrics.putMetric("DeliveryRetryWindowExhausted", 1, Unit.Count); + metrics.putMetric( + "DeliveryRetryWindowExhausted", + 1, + Unit.Count, + StorageResolution.High, + ); } -export function emitAdmissionDenied(targetId: string, reason: string): void { +export function emitAdmissionDenied( + targetId: string, + reason: string, + count = 1, +): void { const metrics = getMetrics(); metrics.setProperty("targetId", targetId); metrics.setProperty("reason", reason); - metrics.putMetric("AdmissionDenied", 1, Unit.Count); + metrics.putMetric( + "AdmissionDenied", + count, + Unit.Count, + StorageResolution.High, + ); } export function emitDeliveryDuration( @@ -86,7 +124,12 @@ export function emitDeliveryDuration( ): void { const metrics = getMetrics(); metrics.setProperty("targetId", targetId); - metrics.putMetric("DeliveryDurationMs", durationMs, Unit.Milliseconds); + metrics.putMetric( + "DeliveryDurationMs", + durationMs, + Unit.Milliseconds, + StorageResolution.High, + ); } export async function flushMetrics(): Promise { diff --git a/lambdas/https-client-lambda/src/services/delivery-observability.ts b/lambdas/https-client-lambda/src/services/delivery-observability.ts index ed41df8a..d722b851 100644 --- a/lambdas/https-client-lambda/src/services/delivery-observability.ts +++ b/lambdas/https-client-lambda/src/services/delivery-observability.ts @@ -16,9 +16,17 @@ export function recordDeliveryAttempt( clientId: string, targetId: string, correlationId?: string, + sqsMessageId?: string, + receiveCount?: number, ): void { emitDeliveryAttempt(targetId); - logger.info("Attempting delivery", { clientId, targetId, correlationId }); + logger.info("Attempting delivery", { + clientId, + targetId, + correlationId, + sqsMessageId, + receiveCount, + }); } export function recordDeliverySuccess( @@ -108,14 +116,15 @@ export function recordAdmissionDenied( clientId: string, targetId: string, reason: string, - correlationId?: string, + correlationIds: (string | undefined)[], ): void { - emitAdmissionDenied(targetId, reason); + emitAdmissionDenied(targetId, reason, correlationIds.length); logger.warn("Admission denied", { clientId, targetId, - correlationId, reason, + deniedCount: correlationIds.length, + correlationIds, }); } diff --git a/lambdas/https-client-lambda/src/services/endpoint-gate.ts b/lambdas/https-client-lambda/src/services/endpoint-gate.ts index c2d85439..b96c00eb 100644 --- a/lambdas/https-client-lambda/src/services/endpoint-gate.ts +++ b/lambdas/https-client-lambda/src/services/endpoint-gate.ts @@ -5,7 +5,7 @@ import recordResultLuaSrc from "services/record-result.lua"; export type AdmitResultAllowed = { allowed: true; - probe: boolean; + consumedTokens: number; effectiveRate: number; }; @@ -18,18 +18,21 @@ export type AdmitResultDenied = { export type AdmitResult = AdmitResultAllowed | AdmitResultDenied; -export type RecordResultOutcome = - | { ok: true; state: "closed" } - | { ok: false; state: "opened" | "failed" }; +export type CircuitState = "open" | "half_open" | "closed_recovery" | "closed"; + +export type RecordResultOutcome = { + circuitState: CircuitState; + stateChanged: boolean; +}; export type EndpointGateConfig = { burstCapacity: number; - cbProbeIntervalMs: number; - decayPeriodMs: number; - cbWindowPeriodMs: number; - cbErrorThreshold: number; - cbMinAttempts: number; - cbCooldownMs: number; + probeRateLimit: number; + recoveryPeriodMs: number; + samplePeriodMs: number; + failureThreshold: number; + minAttempts: number; + cooldownPeriodMs: number; }; let admitSha: string | undefined; @@ -76,22 +79,21 @@ export async function admit( targetId: string, refillPerSec: number, cbEnabled: boolean, + targetBatchSize: number, config: EndpointGateConfig, ): Promise { - const cbKey = `cb:{${targetId}}`; - const rlKey = `rl:{${targetId}}`; + const epKey = `ep:{${targetId}}`; const now = Date.now().toString(); - const probeIntervalMs = cbEnabled ? config.cbProbeIntervalMs.toString() : "0"; const args = [ now, config.burstCapacity.toString(), - // eslint-disable-next-line sonarjs/null-dereference - refillPerSec.toString(), - config.cbCooldownMs.toString(), - config.decayPeriodMs.toString(), - config.cbWindowPeriodMs.toString(), - probeIntervalMs, + String(refillPerSec), + config.cooldownPeriodMs.toString(), + config.recoveryPeriodMs.toString(), + config.probeRateLimit.toString(), + String(targetBatchSize), + cbEnabled ? "1" : "0", ]; if (!admitSha) { @@ -102,16 +104,16 @@ export async function admit( client, admitLuaSrc, admitSha, - [cbKey, rlKey], + [epKey], args, )) as [number, string, number, number]; - const [allowed, reason, retryAfterMs, effectiveRate] = raw; + const [consumedOrFlag, reason, retryAfterMs, effectiveRate] = raw; - if (allowed === 1) { + if (reason === "some_allowed") { return { allowed: true, - probe: reason === "probe", + consumedTokens: Number(consumedOrFlag), effectiveRate: Number(effectiveRate), }; } @@ -127,20 +129,22 @@ export async function admit( export async function recordResult( client: RedisClientType, targetId: string, - success: boolean, + consumedTokens: number, + processingFailures: number, config: EndpointGateConfig, ): Promise { - const cbKey = `cb:{${targetId}}`; + const epKey = `ep:{${targetId}}`; const now = Date.now().toString(); const args = [ now, - success ? "1" : "0", - config.cbCooldownMs.toString(), - config.decayPeriodMs.toString(), - config.cbErrorThreshold.toString(), - config.cbMinAttempts.toString(), - config.cbWindowPeriodMs.toString(), + String(consumedTokens), + String(processingFailures), + config.cooldownPeriodMs.toString(), + config.recoveryPeriodMs.toString(), + config.failureThreshold.toString(), + config.minAttempts.toString(), + config.samplePeriodMs.toString(), ]; if (!recordResultSha) { @@ -151,17 +155,16 @@ export async function recordResult( client, recordResultLuaSrc, recordResultSha, - [cbKey], + [epKey], args, - )) as [number, string]; - - const [ok, state] = raw; + )) as [string, number]; - if (ok === 1) { - return { ok: true, state: "closed" }; - } + const [circuitState, stateChanged] = raw; - return { ok: false, state: state as "opened" | "failed" }; + return { + circuitState: circuitState as CircuitState, + stateChanged: stateChanged === 1, + }; } export function resetAdmitSha(): void { diff --git a/lambdas/https-client-lambda/src/services/record-result.lua b/lambdas/https-client-lambda/src/services/record-result.lua index 1cc94857..fa42efea 100644 --- a/lambdas/https-client-lambda/src/services/record-result.lua +++ b/lambdas/https-client-lambda/src/services/record-result.lua @@ -1,144 +1,169 @@ --- record-result.lua — Records the outcome of a delivery attempt. +-- record-result.lua — Post-processing: updates sampling and circuit breaker. -- --- Updates the circuit breaker's error-rate window counters and opens the --- circuit if the failure rate exceeds the configured threshold. +-- After processing a batch, this script: +-- 1. Manages the sliding window (rolling forward as necessary) +-- 2. Records new attempts and failures (unless fully open) +-- 3. Interpolates attempt/failure rates using the sliding window +-- 4. Checks whether to close the circuit (half-open + successes) +-- 5. Checks whether to open the circuit (closed + threshold exceeded) -- --- On success: --- Window counters are left intact. The openedUntil timestamp is preserved --- while the decay period is still active so that admit.lua can continue --- computing the linear ramp-up rate. Once the decay period elapses it --- is zeroed, returning the circuit to a fully clean closed state. +-- Returns: { circuitState, stateChanged } -- --- On failure: --- The failure and attempt counters are incremented. A two-window sliding --- blend is computed before evaluating the trip condition: --- slidingAttempts = cbAttempts + cbPrevAttempts * prevWeight --- slidingFailures = cbFailures + cbPrevFailures * prevWeight --- where prevWeight decays linearly from 1.0 → 0.0 as the current window ages, --- so previous-window failures fade out gradually rather than dropping off a cliff. --- The circuit opens when: --- • the endpoint is not already open (prevents double-tripping and --- resetting the cooldown timer prematurely), AND --- • slidingAttempts >= cbMinAttempts (avoids tripping on statistically --- insignificant data at cold start or just after a window roll), AND --- • slidingFailures / slidingAttempts exceeds cbErrorThreshold. --- On open, all counters (current and previous) are reset to zero so the --- fresh cooldown window begins with a clean slate ready for recovery. +-- circuitState: the current state of the circuit after this run +-- "open" — fully open (during cooldown, no probes) +-- "half_open" — open but past cooldown (probing) +-- "closed_recovery" — closed but ramping up (recovery period) +-- "closed" — closed, running at full rate -- --- Returns: { ok (0|1), state } --- state: "closed" | "opened" | "failed" +-- stateChanged: whether a circuit transition occurred this run +-- 1 — the circuit opened or closed during this execution +-- 0 — no state transition + +-- Circuit state constants +local OPEN = "open" +local HALF_OPEN = "half_open" +local CLOSED_RECOVERY = "closed_recovery" +local CLOSED = "closed" -- Keys -local cbKey = KEYS[1] -- cb:{endpoint} circuit breaker state hash +local epKey = KEYS[1] -- ep:{targetId} combined endpoint state hash -- Arguments -local now = tonumber(ARGV[1]) or 0 -- current wall-clock time (ms) -local success = tonumber(ARGV[2]) or 0 -- 1 = success, 0 = failure -local cooldownMs = tonumber(ARGV[3]) or 0 -- how long the circuit stays open (ms) -local decayPeriodMs = tonumber(ARGV[4]) or 0 -- ramp-up window after circuit closes (ms) -local cbErrorThreshold = tonumber(ARGV[5]) or 0 -- error-rate fraction that trips the circuit (e.g. 0.5) -local cbMinAttempts = tonumber(ARGV[6]) or 0 -- minimum samples before the circuit can trip -local cbWindowPeriodMs = tonumber(ARGV[7]) or 0 -- error-rate sliding window duration (ms) - --- TTL policy: keep circuit breaker state alive for at least the cooldown --- duration plus a buffer so the decay period remains visible after a close. -local cbTtlSeconds = math.ceil(cooldownMs / 1000) + 60 - -local function refreshCbExpiry() - redis.call("EXPIRE", cbKey, cbTtlSeconds) -end +local now = tonumber(ARGV[1]) or 0 +local consumedTokens = tonumber(ARGV[2]) or 0 +local processingFailures = tonumber(ARGV[3]) or 0 +local cooldownPeriodMs = tonumber(ARGV[4]) or 0 +local recoveryPeriodMs = tonumber(ARGV[5]) or 0 +local failureThreshold = tonumber(ARGV[6]) or 0 +local minAttempts = tonumber(ARGV[7]) or 0 +local samplePeriodMs = tonumber(ARGV[8]) or 0 -------------------------------------------------------------------------------- -- LOAD CURRENT STATE -------------------------------------------------------------------------------- -local cb = redis.call("HMGET", cbKey, - "opened_until_ms", "cb_window_from", "cb_failures", "cb_attempts", - "cb_prev_failures", "cb_prev_attempts") -local openedUntil = tonumber(cb[1] or "0") -local cbWindowFrom = tonumber(cb[2] or "0") -local cbFailures = tonumber(cb[3] or "0") -local cbAttempts = tonumber(cb[4] or "0") -local cbPrevFailures = tonumber(cb[5] or "0") -local cbPrevAttempts = tonumber(cb[6] or "0") +local state = redis.call("HMGET", epKey, + "is_open", "switched_at", + "cur_attempts", "prev_attempts", "cur_failures", "prev_failures", + "sample_till") +local isOpenRaw = state[1] +local needInit = isOpenRaw == false or isOpenRaw == nil +local isOpen = needInit or tonumber(isOpenRaw) == 1 +local switchedAt = needInit and 0 or tonumber(state[2] or "0") +local curAttempts = tonumber(state[3] or "0") +local prevAttempts = tonumber(state[4] or "0") +local curFailures = tonumber(state[5] or "0") +local prevFailures = tonumber(state[6] or "0") +local sampleTill = tonumber(state[7] or "0") + +-------------------------------------------------------------------------------- +-- 1. DETERMINE CIRCUIT SUB-STATE +-------------------------------------------------------------------------------- --- Every outcome (success or failure) contributes to the error-rate window -cbAttempts = cbAttempts + 1 +local isHalfOpen = isOpen and now > switchedAt + cooldownPeriodMs +local isFullyOpen = isOpen and not isHalfOpen -------------------------------------------------------------------------------- --- SUCCESS — preserve openedUntil during decay, then zero it --- --- admit.lua uses openedUntil to calculate the linear ramp-up rate while the --- decay period is active. That timestamp must survive in Redis until the --- decay period ends. Clearing it prematurely would snap the fill rate back --- to full immediately rather than ramping gradually. --------------------------------------------------------------------------------- - -if success == 1 then - -- Keep openedUntil only if we are still within the decay window - local inDecayWindow = openedUntil > 0 and now > openedUntil and (now - openedUntil) < decayPeriodMs - local preservedOpenedUntil = inDecayWindow and openedUntil or 0 - - redis.call("HSET", cbKey, - "opened_until_ms", preservedOpenedUntil, - "cb_window_from", cbWindowFrom, - "cb_failures", cbFailures, - "cb_attempts", cbAttempts, - "cb_prev_failures", cbPrevFailures, - "cb_prev_attempts", cbPrevAttempts - ) - refreshCbExpiry() - return { 1, "closed" } +-- 2. MANAGE SLIDING WINDOW +-------------------------------------------------------------------------------- + +if sampleTill < now then + if sampleTill + samplePeriodMs < now then + -- Complete reset — window is too old + prevAttempts = 0 + prevFailures = 0 + sampleTill = now + samplePeriodMs + else + -- Promote current to previous + prevAttempts = curAttempts + prevFailures = curFailures + sampleTill = sampleTill + samplePeriodMs + end + curAttempts = 0 + curFailures = 0 end -------------------------------------------------------------------------------- --- FAILURE — increment counter and evaluate whether to open the circuit --- --- The trip condition is evaluated against a sliding blend of current and --- previous window counts, not the raw current-window counts alone. This --- prevents a burst of failures from escaping detection simply because it --- straddles a window boundary and gets partially discarded by a reset. --------------------------------------------------------------------------------- - -cbFailures = cbFailures + 1 - --- The circuit is already open when openedUntil is set and has not yet elapsed. --- Guard against double-tripping, which would reset the cooldown timer early. -local circuitAlreadyOpen = openedUntil > 0 and now < openedUntil - --- Blend current and previous window counts. --- prevWeight decays linearly from 1.0 → 0.0 as the current window ages, --- so previous-window failures fade out gradually rather than dropping off a cliff. -local windowElapsed = cbWindowFrom > 0 and (now - cbWindowFrom) or 0 -local hasWindow = cbWindowPeriodMs > 0 -local prevWeight = hasWindow and math.max(0, (cbWindowPeriodMs - windowElapsed) / cbWindowPeriodMs) or 0 -local slidingFailures = cbFailures + cbPrevFailures * prevWeight -local slidingAttempts = cbAttempts + cbPrevAttempts * prevWeight - -if not circuitAlreadyOpen - and slidingAttempts >= cbMinAttempts -- enough data to be statistically meaningful - and (slidingFailures / slidingAttempts) > cbErrorThreshold then - -- Trip the circuit — reset all counters so recovery starts from a clean slate - redis.call("HSET", cbKey, - "opened_until_ms", now + cooldownMs, - "cb_window_from", 0, - "cb_failures", 0, - "cb_attempts", 0, - "cb_prev_failures", 0, - "cb_prev_attempts", 0 - ) - refreshCbExpiry() - return { 0, "opened" } +-- 3. RECORD NEW ATTEMPTS/FAILURES (unless fully open) +-------------------------------------------------------------------------------- + +if not isFullyOpen then + curAttempts = curAttempts + consumedTokens + curFailures = curFailures + processingFailures end --- Below the threshold — record the failure but keep the circuit closed -redis.call("HSET", cbKey, - "opened_until_ms", openedUntil, - "cb_window_from", cbWindowFrom, - "cb_failures", cbFailures, - "cb_attempts", cbAttempts, - "cb_prev_failures", cbPrevFailures, - "cb_prev_attempts", cbPrevAttempts +-------------------------------------------------------------------------------- +-- 4. INTERPOLATE VALUES +-------------------------------------------------------------------------------- + +local weight = (sampleTill - now) / samplePeriodMs +local attempts = prevAttempts * weight + curAttempts +local failures = prevFailures * weight + curFailures + +-------------------------------------------------------------------------------- +-- 5. CIRCUIT BREAKER LOGIC +-------------------------------------------------------------------------------- + +local processingSuccesses = consumedTokens - processingFailures +local stateChanged = false + +-- Close circuit when half-open and there are successes +if isHalfOpen and processingSuccesses > 0 then + isOpen = false + switchedAt = now + stateChanged = true + -- fall through, allow circuit to immediately re-open +end + +-- Open circuit when closed, enough samples, and threshold exceeded +local hasSampledEnough = attempts >= minAttempts +if not isOpen and hasSampledEnough and (failures / attempts) > failureThreshold then + isOpen = true + switchedAt = now + curAttempts = 0 + curFailures = 0 + prevAttempts = 0 + prevFailures = 0 + sampleTill = now + samplePeriodMs + stateChanged = true +end + +-------------------------------------------------------------------------------- +-- 6. DETERMINE CURRENT CIRCUIT STATE FOR REPORTING +-------------------------------------------------------------------------------- + +local circuitState +if isOpen then + if now > switchedAt + cooldownPeriodMs then + circuitState = HALF_OPEN + else + circuitState = OPEN + end +else + if now < switchedAt + recoveryPeriodMs then + circuitState = CLOSED_RECOVERY + else + circuitState = CLOSED + end +end + +-------------------------------------------------------------------------------- +-- 7. PERSIST STATE +-------------------------------------------------------------------------------- + +redis.call("HSET", epKey, + "cur_attempts", curAttempts, + "prev_attempts", prevAttempts, + "cur_failures", curFailures, + "prev_failures", prevFailures, + "sample_till", sampleTill ) -refreshCbExpiry() -return { 0, "failed" } + +if stateChanged then + redis.call("HSET", epKey, + "is_open", isOpen and 1 or 0, + "switched_at", switchedAt + ) +end + +return { circuitState, stateChanged and 1 or 0 } diff --git a/lambdas/perf-runner-lambda/package.json b/lambdas/perf-runner-lambda/package.json index 9f9d01d8..59d7691b 100644 --- a/lambdas/perf-runner-lambda/package.json +++ b/lambdas/perf-runner-lambda/package.json @@ -13,13 +13,17 @@ "typecheck": "tsc --noEmit" }, "dependencies": { + "@aws-crypto/sha256-js": "catalog:aws", "@aws-sdk/client-cloudwatch-logs": "catalog:aws", "@aws-sdk/client-sqs": "catalog:aws", + "@aws-sdk/credential-providers": "catalog:aws", + "@smithy/signature-v4": "catalog:aws", "@nhs-notify-client-callbacks/logger": "workspace:*", "@nhs-notify-client-callbacks/models": "workspace:*", - "esbuild": "catalog:tools" + "@redis/client": "catalog:app" }, "devDependencies": { + "esbuild": "catalog:tools", "@tsconfig/node22": "catalog:tools", "@types/aws-lambda": "catalog:tools", "@types/jest": "catalog:test", diff --git a/lambdas/perf-runner-lambda/src/__tests__/cloudwatch.test.ts b/lambdas/perf-runner-lambda/src/__tests__/cloudwatch.test.ts index 055ac7bc..526de638 100644 --- a/lambdas/perf-runner-lambda/src/__tests__/cloudwatch.test.ts +++ b/lambdas/perf-runner-lambda/src/__tests__/cloudwatch.test.ts @@ -1,5 +1,10 @@ import type { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; -import { queryDeliveryMetricsSnapshot, queryMetricsSnapshot } from "cloudwatch"; +import { + queryCircuitBreakerSnapshot, + queryDeliveryMetricsSnapshot, + queryMetricsSnapshot, + queryPerClientRateTimeline, +} from "cloudwatch"; const mockCloudWatchClient = { send: jest.fn(), @@ -285,3 +290,373 @@ describe("queryDeliveryMetricsSnapshot", () => { expect(result).toBeNull(); }); }); + +describe("queryCircuitBreakerSnapshot", () => { + it("returns null when logGroupNames is empty", async () => { + const result = await queryCircuitBreakerSnapshot( + mockCloudWatchClient, + [], + 0, + 60, + ); + + expect(result).toBeNull(); + expect(mockCloudWatchClient.send).not.toHaveBeenCalled(); + }); + + it("returns null when StartQuery returns no queryId", async () => { + mockCloudWatchClient.send.mockResolvedValueOnce({} as never); + + const result = await queryCircuitBreakerSnapshot( + mockCloudWatchClient, + ["/aws/lambda/test-https-client-perf-client-1"], + 0, + 60, + ); + + expect(result).toBeNull(); + }); + + it("returns a snapshot with zeroed metrics when the result row is empty", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-cb1" } as never) + .mockResolvedValueOnce({ status: "Complete", results: [] } as never); + + const promise = queryCircuitBreakerSnapshot( + mockCloudWatchClient, + ["/aws/lambda/test-https-client-perf-client-1"], + 100, + 160, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toMatchObject({ + intervalStartSec: 100, + intervalEndSec: 160, + circuitOpenEvents: 0, + circuitCloseEvents: 0, + admissionDeniedCircuitOpen: 0, + admissionDeniedRateLimited: 0, + deliveryAttempts: 0, + deliverySuccesses: 0, + deliveryFailures: 0, + deliveryRateLimited: 0, + }); + expect(result?.snapshotAt).toBeGreaterThan(0); + }); + + it("returns a populated snapshot when query completes successfully", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-cb2" } as never) + .mockResolvedValueOnce({ + status: "Complete", + results: [ + [ + { field: "circuitOpenEvents", value: "3" }, + { field: "circuitCloseEvents", value: "2" }, + { field: "admissionDeniedCircuitOpen", value: "15" }, + { field: "admissionDeniedRateLimited", value: "8" }, + { field: "deliveryAttempts", value: "200" }, + { field: "deliverySuccesses", value: "180" }, + { field: "deliveryFailures", value: "12" }, + { field: "deliveryRateLimited", value: "8" }, + ], + ], + } as never); + + const promise = queryCircuitBreakerSnapshot( + mockCloudWatchClient, + ["/aws/lambda/test-https-client-perf-client-1"], + 100, + 160, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toMatchObject({ + intervalStartSec: 100, + intervalEndSec: 160, + circuitOpenEvents: 3, + circuitCloseEvents: 2, + admissionDeniedCircuitOpen: 15, + admissionDeniedRateLimited: 8, + deliveryAttempts: 200, + deliverySuccesses: 180, + deliveryFailures: 12, + deliveryRateLimited: 8, + }); + }); + + it("sends logGroupNames to StartQuery", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-cb3" } as never) + .mockResolvedValueOnce({ status: "Complete", results: [] } as never); + + const logGroups = [ + "/aws/lambda/test-https-client-perf-client-1", + "/aws/lambda/test-https-client-perf-client-2", + ]; + + const promise = queryCircuitBreakerSnapshot( + mockCloudWatchClient, + logGroups, + 0, + 60, + ); + + await jest.runAllTimersAsync(); + await promise; + + const startCmd = mockCloudWatchClient.send.mock.calls[0][0] as { + input: { logGroupNames: string[] }; + }; + expect(startCmd.input.logGroupNames).toEqual(logGroups); + }); + + it("returns null when the query status is Failed", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-cb4" } as never) + .mockResolvedValueOnce({ status: "Failed" } as never); + + const promise = queryCircuitBreakerSnapshot( + mockCloudWatchClient, + ["/aws/lambda/test-https-client-perf-client-1"], + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toBeNull(); + }); +}); + +describe("queryPerClientRateTimeline", () => { + it("returns empty array when StartQuery returns no queryId", async () => { + mockCloudWatchClient.send.mockResolvedValueOnce({} as never); + + const result = await queryPerClientRateTimeline( + mockCloudWatchClient, + "/aws/lambda/test-https-client-perf-client-1", + 0, + 60, + ); + + expect(result).toEqual([]); + }); + + it("returns empty array when the query status is Failed", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-pcr1" } as never) + .mockResolvedValueOnce({ status: "Failed" } as never); + + const promise = queryPerClientRateTimeline( + mockCloudWatchClient, + "/aws/lambda/test-https-client-perf-client-1", + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toEqual([]); + }); + + it("returns empty array when results are empty", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-pcr2" } as never) + .mockResolvedValueOnce({ status: "Complete", results: [] } as never); + + const promise = queryPerClientRateTimeline( + mockCloudWatchClient, + "/aws/lambda/test-https-client-perf-client-1", + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toEqual([]); + }); + + it("returns empty array when results is undefined", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-pcr2b" } as never) + .mockResolvedValueOnce({ status: "Complete" } as never); + + const promise = queryPerClientRateTimeline( + mockCloudWatchClient, + "/aws/lambda/test-https-client-perf-client-1", + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toEqual([]); + }); + + it("defaults missing fields to zero", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-pcr2c" } as never) + .mockResolvedValueOnce({ + status: "Complete", + results: [[{ field: "unknownField", value: "123" }]], + } as never); + + const promise = queryPerClientRateTimeline( + mockCloudWatchClient, + "/aws/lambda/test-https-client-perf-client-1", + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toHaveLength(1); + expect(result[0].deliveryAttempts).toBe(0); + expect(result[0].timestampSec).toBe( + Math.floor(new Date("0").getTime() / 1000), + ); + }); + + it("returns entries sorted by time bin when query completes", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-pcr3" } as never) + .mockResolvedValueOnce({ + status: "Complete", + results: [ + [ + { field: "timeBin", value: "2026-04-09 10:00:00.000" }, + { field: "deliveryAttempts", value: "42" }, + ], + [ + { field: "timeBin", value: "2026-04-09 10:00:10.000" }, + { field: "deliveryAttempts", value: "38" }, + ], + ], + } as never); + + const promise = queryPerClientRateTimeline( + mockCloudWatchClient, + "/aws/lambda/test-https-client-perf-client-1", + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toHaveLength(2); + expect(result[0]).toEqual({ + timestampSec: Math.floor( + new Date("2026-04-09 10:00:00.000").getTime() / 1000, + ), + deliveryAttempts: 42, + }); + expect(result[1]).toEqual({ + timestampSec: Math.floor( + new Date("2026-04-09 10:00:10.000").getTime() / 1000, + ), + deliveryAttempts: 38, + }); + }); + + it("sends logGroupName to StartQuery", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-pcr4" } as never) + .mockResolvedValueOnce({ status: "Complete", results: [] } as never); + + const promise = queryPerClientRateTimeline( + mockCloudWatchClient, + "/aws/lambda/test-https-client-perf-client-1", + 100, + 200, + ); + + await jest.runAllTimersAsync(); + await promise; + + const startCmd = mockCloudWatchClient.send.mock.calls[0][0] as { + input: { logGroupName: string; startTime: number; endTime: number }; + }; + expect(startCmd.input.logGroupName).toBe( + "/aws/lambda/test-https-client-perf-client-1", + ); + expect(startCmd.input.startTime).toBe(100); + expect(startCmd.input.endTime).toBe(200); + }); + + it("polls until the query becomes Complete", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-pcr5" } as never) + .mockResolvedValueOnce({ status: "Running" } as never) + .mockResolvedValueOnce({ + status: "Complete", + results: [ + [ + { field: "timeBin", value: "2026-04-09 10:00:00.000" }, + { field: "deliveryAttempts", value: "5" }, + ], + ], + } as never); + + const promise = queryPerClientRateTimeline( + mockCloudWatchClient, + "/aws/lambda/test-https-client-perf-client-1", + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toHaveLength(1); + expect(result[0].deliveryAttempts).toBe(5); + expect(mockCloudWatchClient.send).toHaveBeenCalledTimes(3); + }); + + it("returns empty array when the query does not complete within the timeout", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-pcr6" } as never) + .mockResolvedValue({ status: "Running" } as never); + + const promise = queryPerClientRateTimeline( + mockCloudWatchClient, + "/aws/lambda/test-https-client-perf-client-1", + 0, + 60, + ); + + await jest.advanceTimersByTimeAsync(60_000); + const result = await promise; + + expect(result).toEqual([]); + }); + + it("returns empty array when the query status is Cancelled", async () => { + mockCloudWatchClient.send + .mockResolvedValueOnce({ queryId: "qid-pcr7" } as never) + .mockResolvedValueOnce({ status: "Cancelled" } as never); + + const promise = queryPerClientRateTimeline( + mockCloudWatchClient, + "/aws/lambda/test-https-client-perf-client-1", + 0, + 60, + ); + + await jest.runAllTimersAsync(); + const result = await promise; + + expect(result).toEqual([]); + }); +}); diff --git a/lambdas/perf-runner-lambda/src/__tests__/elasticache.test.ts b/lambdas/perf-runner-lambda/src/__tests__/elasticache.test.ts new file mode 100644 index 00000000..54c8e813 --- /dev/null +++ b/lambdas/perf-runner-lambda/src/__tests__/elasticache.test.ts @@ -0,0 +1,165 @@ +import { dumpRateLimitState, flushElastiCache } from "elasticache"; +import type { ElastiCacheDeps } from "types"; + +const mockConnect = jest.fn().mockResolvedValue(undefined); +const mockFlushAll = jest.fn().mockResolvedValue("OK"); +const mockDisconnect = jest.fn().mockResolvedValue(undefined); +const mockHmGet = jest.fn().mockResolvedValue([]); +let mockIsOpen = true; +let mockScanKeys: string[] = []; + +jest.mock("@redis/client", () => ({ + createClient: jest.fn(() => ({ + connect: mockConnect, + flushAll: mockFlushAll, + disconnect: mockDisconnect, + hmGet: mockHmGet, + get isOpen() { + return mockIsOpen; + }, + scanIterator: jest.fn(function scanIterator() { + return mockScanKeys[Symbol.iterator](); + }), + })), +})); + +jest.mock("@smithy/signature-v4", () => ({ + SignatureV4: jest.fn(() => ({ + presign: jest.fn().mockResolvedValue({ + query: { + "X-Amz-Algorithm": "AWS4-HMAC-SHA256", + "X-Amz-Credential": "test-credential", + }, + }), + })), +})); + +jest.mock("@aws-crypto/sha256-js", () => ({ + Sha256: jest.fn(), +})); + +jest.mock("@aws-sdk/credential-providers", () => ({ + fromNodeProviderChain: jest.fn(() => ({})), +})); + +const deps: ElastiCacheDeps = { + endpoint: "test-cache.example.invalid", + cacheName: "test-cache", + iamUsername: "test-user", + region: "eu-west-2", +}; + +beforeEach(() => { + jest.clearAllMocks(); + mockIsOpen = true; + mockScanKeys = []; +}); + +describe("flushElastiCache", () => { + it("connects, flushes all keys, and disconnects", async () => { + await flushElastiCache(deps); + + expect(mockConnect).toHaveBeenCalledTimes(1); + expect(mockFlushAll).toHaveBeenCalledTimes(1); + expect(mockDisconnect).toHaveBeenCalledTimes(1); + }); + + it("disconnects even when flushAll throws", async () => { + mockFlushAll.mockRejectedValueOnce(new Error("flush failed")); + + await expect(flushElastiCache(deps)).rejects.toThrow("flush failed"); + expect(mockDisconnect).toHaveBeenCalledTimes(1); + }); + + it("skips disconnect when client is not open", async () => { + mockIsOpen = false; + + await flushElastiCache(deps); + + expect(mockDisconnect).not.toHaveBeenCalled(); + }); +}); + +describe("dumpRateLimitState", () => { + it("returns empty array when no ep: keys exist", async () => { + mockScanKeys = []; + + const result = await dumpRateLimitState(deps); + + expect(result).toEqual([]); + expect(mockConnect).toHaveBeenCalledTimes(1); + expect(mockDisconnect).toHaveBeenCalledTimes(1); + }); + + it("returns state for each ep: key sorted alphabetically", async () => { + mockScanKeys = ["ep:{target-b}", "ep:{target-a}"]; + mockHmGet + .mockResolvedValueOnce([ + "1", + "500", + "0", + "400", + "20", + "15", + "5", + "3", + "1500", + ]) + .mockResolvedValueOnce([ + "0", + "1000", + "5", + "900", + "10", + "8", + "2", + "1", + "2000", + ]); + + const result = await dumpRateLimitState(deps); + + expect(result).toHaveLength(2); + expect(result[0].key).toBe("ep:{target-a}"); + expect(result[0].isOpen).toBe("1"); + expect(result[0].switchedAt).toBe("500"); + expect(result[0].bucketTokens).toBe("0"); + expect(result[0].bucketRefilledAt).toBe("400"); + expect(result[0].curAttempts).toBe("20"); + expect(result[0].prevAttempts).toBe("15"); + expect(result[0].curFailures).toBe("5"); + expect(result[0].prevFailures).toBe("3"); + expect(result[0].sampleTill).toBe("1500"); + + expect(result[1].key).toBe("ep:{target-b}"); + expect(result[1].isOpen).toBe("0"); + }); + + it("disconnects even when scan throws", async () => { + const mockClient = { + connect: mockConnect, + disconnect: mockDisconnect, + hmGet: mockHmGet, + get isOpen() { + return mockIsOpen; + }, + scanIterator: jest.fn(() => { + throw new Error("scan failed"); + }), + }; + const { createClient } = jest.requireMock("@redis/client"); + createClient.mockReturnValueOnce(mockClient); + + await expect(dumpRateLimitState(deps)).rejects.toThrow("scan failed"); + expect(mockDisconnect).toHaveBeenCalledTimes(1); + }); + + it("skips disconnect when client is not open", async () => { + mockIsOpen = false; + mockScanKeys = []; + + await dumpRateLimitState(deps); + + expect(mockDisconnect).not.toHaveBeenCalled(); + }); +}); diff --git a/lambdas/perf-runner-lambda/src/__tests__/index.test.ts b/lambdas/perf-runner-lambda/src/__tests__/index.test.ts index 1d1a501a..3c33bfd6 100644 --- a/lambdas/perf-runner-lambda/src/__tests__/index.test.ts +++ b/lambdas/perf-runner-lambda/src/__tests__/index.test.ts @@ -32,6 +32,7 @@ const mockResult: PerformanceResult = { phases: [], metrics: [], deliveryMetrics: [], + circuitBreakerMetrics: [], }; beforeEach(() => { @@ -42,6 +43,11 @@ beforeEach(() => { "/aws/lambda/nhs-dev-callbacks-client-transform-filter"; process.env.DELIVERY_LOG_GROUP_PREFIX = "/aws/lambda/nhs-dev-callbacks-https-client-"; + process.env.MOCK_WEBHOOK_LOG_GROUP = + "/aws/lambda/nhs-dev-callbacks-mock-webhook"; + process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; + process.env.ELASTICACHE_CACHE_NAME = "test-cache"; + process.env.ELASTICACHE_IAM_USERNAME = "test-user"; process.env.AWS_REGION = "eu-west-2"; }); @@ -55,9 +61,17 @@ describe("handler", () => { queueUrl: "https://sqs.example.invalid/queue", logGroupName: "/aws/lambda/nhs-dev-callbacks-client-transform-filter", deliveryLogGroupPrefix: "/aws/lambda/nhs-dev-callbacks-https-client-", + mockWebhookLogGroup: "/aws/lambda/nhs-dev-callbacks-mock-webhook", }), DEFAULT_SCENARIO, "test-id", + undefined, + expect.objectContaining({ + endpoint: "cache.example.invalid", + cacheName: "test-cache", + iamUsername: "test-user", + region: "eu-west-2", + }), ); }); @@ -73,6 +87,8 @@ describe("handler", () => { expect.anything(), customScenario, "custom-test", + undefined, + expect.anything(), ); }); @@ -117,6 +133,38 @@ describe("handler", () => { }), DEFAULT_SCENARIO, "no-prefix-test", + undefined, + expect.anything(), + ); + }); + + it("passes undefined elastiCacheDeps when ElastiCache env vars are missing", async () => { + delete process.env.ELASTICACHE_ENDPOINT; + delete process.env.ELASTICACHE_CACHE_NAME; + delete process.env.ELASTICACHE_IAM_USERNAME; + + await handler({ testId: "no-cache-test" }); + + expect(mockRunPerformanceTest).toHaveBeenCalledWith( + expect.anything(), + DEFAULT_SCENARIO, + "no-cache-test", + undefined, + undefined, + ); + }); + + it("passes mockWebhookLogGroup from env var", async () => { + await handler({ testId: "webhook-test" }); + + expect(mockRunPerformanceTest).toHaveBeenCalledWith( + expect.objectContaining({ + mockWebhookLogGroup: "/aws/lambda/nhs-dev-callbacks-mock-webhook", + }), + expect.anything(), + "webhook-test", + undefined, + expect.anything(), ); }); }); diff --git a/lambdas/perf-runner-lambda/src/__tests__/purge.test.ts b/lambdas/perf-runner-lambda/src/__tests__/purge.test.ts new file mode 100644 index 00000000..14bcf247 --- /dev/null +++ b/lambdas/perf-runner-lambda/src/__tests__/purge.test.ts @@ -0,0 +1,116 @@ +import type { SQSClient } from "@aws-sdk/client-sqs"; +import { deriveQueueUrls, purgeQueues } from "purge"; +import type { Scenario } from "types"; + +const scenario: Scenario = { + phases: [{ durationSecs: 1, targetEps: 10 }], + eventMix: [ + { + weight: 1, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "DELIVERED", + }, + { + weight: 1, + factory: "channelStatus", + clientId: "perf-client-2", + channelStatus: "DELIVERED", + }, + ], + metricsIntervalSecs: 5, +}; + +const inboundQueueUrl = + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-queue"; + +describe("deriveQueueUrls", () => { + it("derives all queue URLs from the inbound queue URL and scenario", () => { + const urls = deriveQueueUrls(inboundQueueUrl, scenario); + + expect(urls).toEqual([ + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-dlq-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-1-delivery-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-1-delivery-dlq-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-2-delivery-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-2-delivery-dlq-queue", + ]); + }); + + it("deduplicates client IDs that appear multiple times in eventMix", () => { + const duplicateScenario: Scenario = { + ...scenario, + eventMix: [ + { + weight: 1, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "DELIVERED", + }, + { + weight: 1, + factory: "channelStatus", + clientId: "perf-client-1", + channelStatus: "DELIVERED", + }, + ], + }; + + const urls = deriveQueueUrls(inboundQueueUrl, duplicateScenario); + + expect(urls).toEqual([ + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-dlq-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-1-delivery-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-1-delivery-dlq-queue", + ]); + }); +}); + +describe("purgeQueues", () => { + const mockSend = jest.fn().mockResolvedValue({}); + const mockSqsClient = { send: mockSend } as unknown as SQSClient; + + beforeEach(() => { + jest.clearAllMocks(); + mockSend.mockResolvedValue({}); + }); + + it("sends a PurgeQueueCommand for each queue URL", async () => { + const urls = [ + "https://sqs.example.invalid/queue-a", + "https://sqs.example.invalid/queue-b", + ]; + + await purgeQueues(mockSqsClient, urls); + + expect(mockSend).toHaveBeenCalledTimes(2); + }); + + it("ignores QueueDoesNotExist errors gracefully", async () => { + const nonExistentError = Object.assign(new Error("Queue does not exist"), { + name: "QueueDoesNotExist", + }); + mockSend.mockRejectedValueOnce(nonExistentError); + + await expect( + purgeQueues(mockSqsClient, ["https://sqs.example.invalid/missing"]), + ).resolves.toBeUndefined(); + }); + + it("rethrows non-QueueDoesNotExist errors", async () => { + const otherError = new Error("Access denied"); + mockSend.mockRejectedValueOnce(otherError); + + await expect( + purgeQueues(mockSqsClient, ["https://sqs.example.invalid/queue"]), + ).rejects.toThrow("Access denied"); + }); + + it("handles an empty queue URL list without sending any commands", async () => { + await purgeQueues(mockSqsClient, []); + + expect(mockSend).not.toHaveBeenCalled(); + }); +}); diff --git a/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts b/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts index 1cf5f3a3..622e98a4 100644 --- a/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts +++ b/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts @@ -1,6 +1,7 @@ import type { SQSClient } from "@aws-sdk/client-sqs"; import type { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; import type { + CircuitBreakerSnapshot, DeliveryMetricsSnapshot, MetricsSnapshot, PhaseResult, @@ -10,16 +11,36 @@ import type { import { defaultSleep, runPerformanceTest } from "runner"; import { generatePhaseLoad } from "sqs"; -import { queryDeliveryMetricsSnapshot, queryMetricsSnapshot } from "cloudwatch"; +import { deriveQueueUrls, purgeQueues } from "purge"; +import { dumpRateLimitState, flushElastiCache } from "elasticache"; +import { verifyMockWebhook } from "webhook-verify"; +import { + queryCircuitBreakerSnapshot, + queryDeliveryMetricsSnapshot, + queryMetricsSnapshot, + queryPerClientRateTimeline, +} from "cloudwatch"; jest.mock("sqs"); jest.mock("cloudwatch"); +jest.mock("purge"); +jest.mock("elasticache"); +jest.mock("webhook-verify"); const mockGeneratePhaseLoad = jest.mocked(generatePhaseLoad); const mockQueryMetricsSnapshot = jest.mocked(queryMetricsSnapshot); const mockQueryDeliveryMetricsSnapshot = jest.mocked( queryDeliveryMetricsSnapshot, ); +const mockQueryCircuitBreakerSnapshot = jest.mocked( + queryCircuitBreakerSnapshot, +); +const mockQueryPerClientRateTimeline = jest.mocked(queryPerClientRateTimeline); +const mockDeriveQueueUrls = jest.mocked(deriveQueueUrls); +const mockPurgeQueues = jest.mocked(purgeQueues); +const mockFlushElastiCache = jest.mocked(flushElastiCache); +const mockDumpRateLimitState = jest.mocked(dumpRateLimitState); +const mockVerifyMockWebhook = jest.mocked(verifyMockWebhook); const immediateSleep = jest.fn().mockResolvedValue(undefined); @@ -46,6 +67,20 @@ const mockDeliverySnapshot: DeliveryMetricsSnapshot = { p99Ms: 500, }; +const mockCbSnapshot: CircuitBreakerSnapshot = { + snapshotAt: Date.now(), + intervalStartSec: 0, + intervalEndSec: 60, + circuitOpenEvents: 1, + circuitCloseEvents: 0, + admissionDeniedCircuitOpen: 5, + admissionDeniedRateLimited: 3, + deliveryAttempts: 100, + deliverySuccesses: 92, + deliveryFailures: 5, + deliveryRateLimited: 3, +}; + const scenario: Scenario = { phases: [{ durationSecs: 1, targetEps: 1000 }], eventMix: [ @@ -71,6 +106,18 @@ beforeEach(() => { jest.clearAllMocks(); mockGeneratePhaseLoad.mockResolvedValue(mockPhaseResult); mockQueryDeliveryMetricsSnapshot.mockResolvedValue(null); + mockQueryCircuitBreakerSnapshot.mockResolvedValue(null); + mockQueryPerClientRateTimeline.mockResolvedValue([]); + mockDeriveQueueUrls.mockReturnValue([ + "https://sqs.example.invalid/inbound-event-queue", + ]); + mockPurgeQueues.mockResolvedValue(undefined); + mockFlushElastiCache.mockResolvedValue(undefined); + mockDumpRateLimitState.mockResolvedValue([]); + mockVerifyMockWebhook.mockResolvedValue({ + receivedCallbacks: 0, + verified: false, + }); immediateSleep.mockResolvedValue(undefined); }); @@ -78,6 +125,7 @@ describe("runPerformanceTest", () => { it("returns a PerformanceResult with phase results and snapshots from polling and final query", async () => { mockQueryMetricsSnapshot.mockResolvedValue(mockSnapshot); mockQueryDeliveryMetricsSnapshot.mockResolvedValue(mockDeliverySnapshot); + mockQueryCircuitBreakerSnapshot.mockResolvedValue(mockCbSnapshot); const result = await runPerformanceTest( deps, @@ -92,6 +140,7 @@ describe("runPerformanceTest", () => { expect(result.phases[0]).toEqual(mockPhaseResult); expect(result.metrics).toHaveLength(2); // one mid-test, one final expect(result.deliveryMetrics).toHaveLength(2); // one mid-test, one final + expect(result.circuitBreakerMetrics).toHaveLength(2); // one mid-test, one final expect(result.startedAt).toBeTruthy(); expect(result.completedAt).toBeTruthy(); }); @@ -111,6 +160,7 @@ describe("runPerformanceTest", () => { expect(result.metrics).toHaveLength(1); expect(result.metrics[0]).toEqual(mockSnapshot); expect(result.deliveryMetrics).toHaveLength(0); + expect(result.circuitBreakerMetrics).toHaveLength(0); }); it("produces an empty metrics array when all queries return null", async () => { @@ -125,6 +175,7 @@ describe("runPerformanceTest", () => { expect(result.metrics).toHaveLength(0); expect(result.deliveryMetrics).toHaveLength(0); + expect(result.circuitBreakerMetrics).toHaveLength(0); }); it("runs all phases and collects each result", async () => { @@ -267,7 +318,9 @@ describe("runPerformanceTest", () => { ); expect(mockQueryDeliveryMetricsSnapshot).not.toHaveBeenCalled(); + expect(mockQueryCircuitBreakerSnapshot).not.toHaveBeenCalled(); expect(result.deliveryMetrics).toHaveLength(0); + expect(result.circuitBreakerMetrics).toHaveLength(0); }); it("builds delivery log group names from prefix and event mix client IDs", async () => { @@ -309,6 +362,326 @@ describe("runPerformanceTest", () => { expect.any(Number), ); }); + + it("collects circuit breaker metrics when deliveryLogGroupPrefix is set", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(mockSnapshot); + mockQueryDeliveryMetricsSnapshot.mockResolvedValue(mockDeliverySnapshot); + mockQueryCircuitBreakerSnapshot.mockResolvedValue(mockCbSnapshot); + + const result = await runPerformanceTest( + deps, + scenario, + "test-cb-1", + immediateSleep, + ); + + expect(result.circuitBreakerMetrics.length).toBeGreaterThanOrEqual(1); + expect(mockQueryCircuitBreakerSnapshot).toHaveBeenCalled(); + }); + + it("returns empty circuitBreakerMetrics when CB queries return null", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(mockSnapshot); + mockQueryDeliveryMetricsSnapshot.mockResolvedValue(mockDeliverySnapshot); + mockQueryCircuitBreakerSnapshot.mockResolvedValue(null); + + const result = await runPerformanceTest( + deps, + scenario, + "test-cb-null", + immediateSleep, + ); + + expect(result.circuitBreakerMetrics).toHaveLength(0); + }); + + it("uses per-interval windowing for circuit breaker snapshots", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(mockSnapshot); + mockQueryDeliveryMetricsSnapshot.mockResolvedValue(mockDeliverySnapshot); + mockQueryCircuitBreakerSnapshot.mockResolvedValue(mockCbSnapshot); + + let resolvePhase!: (value: PhaseResult) => void; + mockGeneratePhaseLoad.mockImplementation( + () => + new Promise((r) => { + resolvePhase = r; + }), + ); + + let sleepCount = 0; + const controlledSleep = jest.fn(async () => { + sleepCount += 1; + if (sleepCount >= 3) { + resolvePhase(mockPhaseResult); + } + }); + + await runPerformanceTest( + deps, + scenario, + "test-cb-interval", + controlledSleep, + ); + + const cbCalls = mockQueryCircuitBreakerSnapshot.mock.calls; + expect(cbCalls.length).toBeGreaterThanOrEqual(2); + const firstCallEndSec = cbCalls[0][3]; + const secondCallStartSec = cbCalls[1][2]; + expect(secondCallStartSec).toBe(firstCallEndSec); + }); + + it("collects per-client rate timelines when deliveryLogGroupPrefix is set", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(mockSnapshot); + mockQueryDeliveryMetricsSnapshot.mockResolvedValue(mockDeliverySnapshot); + mockQueryPerClientRateTimeline.mockResolvedValue([ + { timestampSec: 1000, deliveryAttempts: 10 }, + ]); + + const result = await runPerformanceTest( + deps, + scenario, + "test-pcr-1", + immediateSleep, + ); + + expect(result.perClientRateTimelines).toHaveLength(1); + expect(result.perClientRateTimelines![0].clientId).toBe("perf-client-1"); + expect(result.perClientRateTimelines![0].entries).toHaveLength(1); + }); + + it("queries each client log group individually for rate timelines", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(null); + mockQueryPerClientRateTimeline.mockResolvedValue([ + { timestampSec: 1000, deliveryAttempts: 5 }, + ]); + + const multiClientScenario: Scenario = { + ...scenario, + eventMix: [ + { + weight: 1, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "DELIVERED", + }, + { + weight: 1, + factory: "channelStatus", + clientId: "perf-client-2", + channelStatus: "DELIVERED", + }, + ], + }; + + const result = await runPerformanceTest( + deps, + multiClientScenario, + "test-pcr-multi", + immediateSleep, + ); + + expect(mockQueryPerClientRateTimeline).toHaveBeenCalledTimes(2); + expect(mockQueryPerClientRateTimeline).toHaveBeenCalledWith( + deps.cloudWatchClient, + "/aws/lambda/nhs-dev-callbacks-https-client-perf-client-1", + expect.any(Number), + expect.any(Number), + ); + expect(mockQueryPerClientRateTimeline).toHaveBeenCalledWith( + deps.cloudWatchClient, + "/aws/lambda/nhs-dev-callbacks-https-client-perf-client-2", + expect.any(Number), + expect.any(Number), + ); + expect(result.perClientRateTimelines).toHaveLength(2); + }); + + it("excludes clients with empty rate timelines", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(null); + mockQueryPerClientRateTimeline + .mockResolvedValueOnce([{ timestampSec: 1000, deliveryAttempts: 5 }]) + .mockResolvedValueOnce([]); + + const multiClientScenario: Scenario = { + ...scenario, + eventMix: [ + { + weight: 1, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "DELIVERED", + }, + { + weight: 1, + factory: "channelStatus", + clientId: "perf-client-2", + channelStatus: "DELIVERED", + }, + ], + }; + + const result = await runPerformanceTest( + deps, + multiClientScenario, + "test-pcr-filter", + immediateSleep, + ); + + expect(result.perClientRateTimelines).toHaveLength(1); + expect(result.perClientRateTimelines![0].clientId).toBe("perf-client-1"); + }); + + it("skips per-client rate timelines when deliveryLogGroupPrefix is undefined", async () => { + const depsWithoutPrefix: RunnerDeps = { + ...deps, + deliveryLogGroupPrefix: undefined, + }; + mockQueryMetricsSnapshot.mockResolvedValue(mockSnapshot); + + const result = await runPerformanceTest( + depsWithoutPrefix, + scenario, + "test-pcr-skip", + immediateSleep, + ); + + expect(mockQueryPerClientRateTimeline).not.toHaveBeenCalled(); + expect(result.perClientRateTimelines).toHaveLength(0); + }); + + it("purges queues before and after the test run", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(null); + + await runPerformanceTest(deps, scenario, "test-purge", immediateSleep); + + expect(mockDeriveQueueUrls).toHaveBeenCalledWith(deps.queueUrl, scenario); + expect(mockPurgeQueues).toHaveBeenCalledTimes(2); + }); + + it("flushes ElastiCache before and after when deps are provided", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(null); + const elastiCacheDeps = { + endpoint: "cache.example.invalid", + cacheName: "test-cache", + iamUsername: "test-user", + region: "eu-west-2", + }; + + await runPerformanceTest( + deps, + scenario, + "test-flush", + immediateSleep, + elastiCacheDeps, + ); + + expect(mockFlushElastiCache).toHaveBeenCalledTimes(2); + expect(mockFlushElastiCache).toHaveBeenCalledWith(elastiCacheDeps); + }); + + it("dumps rate limit state before and after when elasticache deps are provided", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(null); + const elastiCacheDeps = { + endpoint: "cache.example.invalid", + cacheName: "test-cache", + iamUsername: "test-user", + region: "eu-west-2", + }; + const mockState = [ + { + key: "ep:{target-1}", + isOpen: "0", + switchedAt: "0", + bucketTokens: "10", + bucketRefilledAt: "1000", + curAttempts: "5", + prevAttempts: "3", + curFailures: "0", + prevFailures: "0", + sampleTill: "2000", + }, + ]; + mockDumpRateLimitState.mockResolvedValue(mockState); + + const result = await runPerformanceTest( + deps, + scenario, + "test-dump", + immediateSleep, + elastiCacheDeps, + ); + + expect(mockDumpRateLimitState).toHaveBeenCalledTimes(2); + expect(mockDumpRateLimitState).toHaveBeenCalledWith(elastiCacheDeps); + expect(result.rateLimitStateBefore).toEqual(mockState); + expect(result.rateLimitStateAfter).toEqual(mockState); + }); + + it("omits rate limit state when elasticache deps are not provided", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(null); + + const result = await runPerformanceTest( + deps, + scenario, + "test-no-dump", + immediateSleep, + ); + + expect(mockDumpRateLimitState).not.toHaveBeenCalled(); + expect(result.rateLimitStateBefore).toBeUndefined(); + expect(result.rateLimitStateAfter).toBeUndefined(); + }); + + it("skips ElastiCache flush when deps are not provided", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(null); + + await runPerformanceTest(deps, scenario, "test-no-flush", immediateSleep); + + expect(mockFlushElastiCache).not.toHaveBeenCalled(); + }); + + it("verifies mock webhook when log group is configured", async () => { + const depsWithWebhook: RunnerDeps = { + ...deps, + mockWebhookLogGroup: "/aws/lambda/test-mock-webhook", + }; + mockQueryMetricsSnapshot.mockResolvedValue(null); + mockVerifyMockWebhook.mockResolvedValue({ + receivedCallbacks: 25, + verified: true, + }); + + const result = await runPerformanceTest( + depsWithWebhook, + scenario, + "test-webhook", + immediateSleep, + ); + + expect(mockVerifyMockWebhook).toHaveBeenCalledWith( + depsWithWebhook.cloudWatchClient, + "/aws/lambda/test-mock-webhook", + expect.any(Number), + expect.any(Number), + ); + expect(result.webhookVerification).toEqual({ + receivedCallbacks: 25, + verified: true, + }); + }); + + it("omits webhook verification when log group is not configured", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(null); + + const result = await runPerformanceTest( + deps, + scenario, + "test-no-webhook", + immediateSleep, + ); + + expect(mockVerifyMockWebhook).not.toHaveBeenCalled(); + expect(result.webhookVerification).toBeUndefined(); + }); }); describe("defaultSleep", () => { diff --git a/lambdas/perf-runner-lambda/src/__tests__/webhook-verify.test.ts b/lambdas/perf-runner-lambda/src/__tests__/webhook-verify.test.ts new file mode 100644 index 00000000..72c49870 --- /dev/null +++ b/lambdas/perf-runner-lambda/src/__tests__/webhook-verify.test.ts @@ -0,0 +1,173 @@ +import type { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; +import { verifyMockWebhook } from "webhook-verify"; + +const mockSend = jest.fn(); +const mockClient = { send: mockSend } as unknown as CloudWatchLogsClient; + +beforeEach(() => { + jest.clearAllMocks(); +}); + +describe("verifyMockWebhook", () => { + it("returns verified=true when callbacks are found", async () => { + mockSend.mockResolvedValueOnce({ queryId: "q-1" }).mockResolvedValueOnce({ + status: "Complete", + results: [[{ field: "callbackCount", value: "42" }]], + }); + + const result = await verifyMockWebhook( + mockClient, + "/aws/lambda/test-mock-webhook", + 1000, + 2000, + ); + + expect(result).toEqual({ receivedCallbacks: 42, verified: true }); + }); + + it("returns verified=false when no callbacks are found", async () => { + mockSend.mockResolvedValueOnce({ queryId: "q-2" }).mockResolvedValueOnce({ + status: "Complete", + results: [[{ field: "callbackCount", value: "0" }]], + }); + + const result = await verifyMockWebhook( + mockClient, + "/aws/lambda/test-mock-webhook", + 1000, + 2000, + ); + + expect(result).toEqual({ receivedCallbacks: 0, verified: false }); + }); + + it("returns verified=false when query fails", async () => { + mockSend + .mockResolvedValueOnce({ queryId: "q-3" }) + .mockResolvedValueOnce({ status: "Failed" }); + + const result = await verifyMockWebhook( + mockClient, + "/aws/lambda/test-mock-webhook", + 1000, + 2000, + ); + + expect(result).toEqual({ receivedCallbacks: 0, verified: false }); + }); + + it("returns verified=false when no queryId is returned", async () => { + mockSend.mockResolvedValueOnce({}); + + const result = await verifyMockWebhook( + mockClient, + "/aws/lambda/test-mock-webhook", + 1000, + 2000, + ); + + expect(result).toEqual({ receivedCallbacks: 0, verified: false }); + }); + + it("returns verified=false when results are empty", async () => { + mockSend.mockResolvedValueOnce({ queryId: "q-4" }).mockResolvedValueOnce({ + status: "Complete", + results: [], + }); + + const result = await verifyMockWebhook( + mockClient, + "/aws/lambda/test-mock-webhook", + 1000, + 2000, + ); + + expect(result).toEqual({ receivedCallbacks: 0, verified: false }); + }); + + it("returns verified=false when results field is undefined", async () => { + mockSend.mockResolvedValueOnce({ queryId: "q-4b" }).mockResolvedValueOnce({ + status: "Complete", + results: undefined, + }); + + const result = await verifyMockWebhook( + mockClient, + "/aws/lambda/test-mock-webhook", + 1000, + 2000, + ); + + expect(result).toEqual({ receivedCallbacks: 0, verified: false }); + }); + + it("polls until the query completes", async () => { + mockSend + .mockResolvedValueOnce({ queryId: "q-5" }) + .mockResolvedValueOnce({ status: "Running" }) + .mockResolvedValueOnce({ + status: "Complete", + results: [[{ field: "callbackCount", value: "10" }]], + }); + + const result = await verifyMockWebhook( + mockClient, + "/aws/lambda/test-mock-webhook", + 1000, + 2000, + ); + + expect(result).toEqual({ receivedCallbacks: 10, verified: true }); + expect(mockSend).toHaveBeenCalledTimes(3); + }); + + it("returns verified=false when query is cancelled", async () => { + mockSend + .mockResolvedValueOnce({ queryId: "q-6" }) + .mockResolvedValueOnce({ status: "Cancelled" }); + + const result = await verifyMockWebhook( + mockClient, + "/aws/lambda/test-mock-webhook", + 1000, + 2000, + ); + + expect(result).toEqual({ receivedCallbacks: 0, verified: false }); + }); + + it("returns verified=false when polling times out", async () => { + jest.useFakeTimers(); + + mockSend.mockResolvedValueOnce({ queryId: "q-7" }).mockImplementation( + () => + new Promise((resolve) => { + setTimeout(() => resolve({ status: "Running" }), 1000); + }), + ); + + const originalDateNow = Date.now; + let callCount = 0; + jest.spyOn(Date, "now").mockImplementation(() => { + callCount += 1; + if (callCount <= 1) return originalDateNow.call(Date); + return originalDateNow.call(Date) + 60_000; + }); + + const promise = verifyMockWebhook( + mockClient, + "/aws/lambda/test-mock-webhook", + 1000, + 2000, + ); + + await jest.advanceTimersByTimeAsync(60_000); + + const result = await promise; + + expect(result).toEqual({ receivedCallbacks: 0, verified: false }); + + jest.useRealTimers(); + jest.restoreAllMocks(); + }); +}); diff --git a/lambdas/perf-runner-lambda/src/cloudwatch.ts b/lambdas/perf-runner-lambda/src/cloudwatch.ts index 206bec33..598f5f3f 100644 --- a/lambdas/perf-runner-lambda/src/cloudwatch.ts +++ b/lambdas/perf-runner-lambda/src/cloudwatch.ts @@ -3,19 +3,22 @@ import { GetQueryResultsCommand, StartQueryCommand, } from "@aws-sdk/client-cloudwatch-logs"; -import type { DeliveryMetricsSnapshot, MetricsSnapshot } from "types"; +import type { + CircuitBreakerSnapshot, + DeliveryMetricsSnapshot, + MetricsSnapshot, + PerClientRateEntry, +} from "types"; const INSIGHTS_POLL_INTERVAL_MS = 2000; const INSIGHTS_TIMEOUT_MS = 30_000; type ResultField = { field?: string; value?: string }; -async function pollQueryResults( +async function pollInsightsQuery( client: CloudWatchLogsClient, queryId: string, - mapRow: (row: ResultField[]) => T, -): Promise { - const zeroResult = mapRow([]); +): Promise { const deadline = Date.now() + INSIGHTS_TIMEOUT_MS; while (Date.now() < deadline) { @@ -30,15 +33,33 @@ async function pollQueryResults( } if (response.status === "Complete") { - const row = response.results?.[0]; - if (!row) return zeroResult; - return mapRow(row); + return (response.results as ResultField[][]) ?? []; } } return null; } +async function pollQueryResults( + client: CloudWatchLogsClient, + queryId: string, + mapRow: (row: ResultField[]) => T, +): Promise { + const rows = await pollInsightsQuery(client, queryId); + if (rows === null) return null; + return mapRow(rows[0] ?? []); +} + +async function pollAllQueryResults( + client: CloudWatchLogsClient, + queryId: string, + mapRow: (row: ResultField[]) => T, +): Promise { + const rows = await pollInsightsQuery(client, queryId); + if (rows === null) return []; + return rows.map((row) => mapRow(row)); +} + export async function queryMetricsSnapshot( client: CloudWatchLogsClient, logGroupName: string, @@ -108,3 +129,86 @@ export async function queryDeliveryMetricsSnapshot( }; }); } + +export async function queryCircuitBreakerSnapshot( + client: CloudWatchLogsClient, + logGroupNames: string[], + startTimeSec: number, + endTimeSec: number, +): Promise { + if (logGroupNames.length === 0) return null; + + const { queryId } = await client.send( + new StartQueryCommand({ + logGroupNames, + startTime: startTimeSec, + endTime: endTimeSec, + queryString: [ + 'filter msg in ["Circuit breaker opened", "Circuit breaker closed", "Admission denied", "Attempting delivery", "Delivery succeeded", "Transient delivery failure \u2014 requeuing", "Permanent delivery failure \u2014 sending to DLQ", "Rate limited (429)"]', + '| stats sum(msg = "Circuit breaker opened") as circuitOpenEvents,', + ' sum(msg = "Circuit breaker closed") as circuitCloseEvents,', + ' sum(msg = "Admission denied" and reason = "circuit_open") as admissionDeniedCircuitOpen,', + ' sum(msg = "Admission denied" and reason = "rate_limited") as admissionDeniedRateLimited,', + ' sum(msg = "Attempting delivery") as deliveryAttempts,', + ' sum(msg = "Delivery succeeded") as deliverySuccesses,', + ' sum(msg in ["Transient delivery failure \u2014 requeuing", "Permanent delivery failure \u2014 sending to DLQ"]) as deliveryFailures,', + ' sum(msg = "Rate limited (429)") as deliveryRateLimited', + ].join("\n"), + }), + ); + + if (!queryId) return null; + + return pollQueryResults(client, queryId, (row) => { + const getField = (name: string): number => + Number(row.find((f) => f.field === name)?.value ?? 0); + + return { + snapshotAt: Date.now(), + intervalStartSec: startTimeSec, + intervalEndSec: endTimeSec, + circuitOpenEvents: getField("circuitOpenEvents"), + circuitCloseEvents: getField("circuitCloseEvents"), + admissionDeniedCircuitOpen: getField("admissionDeniedCircuitOpen"), + admissionDeniedRateLimited: getField("admissionDeniedRateLimited"), + deliveryAttempts: getField("deliveryAttempts"), + deliverySuccesses: getField("deliverySuccesses"), + deliveryFailures: getField("deliveryFailures"), + deliveryRateLimited: getField("deliveryRateLimited"), + }; + }); +} + +const RATE_TIMELINE_BIN_SECONDS = 10; + +export async function queryPerClientRateTimeline( + client: CloudWatchLogsClient, + logGroupName: string, + startTimeSec: number, + endTimeSec: number, +): Promise { + const { queryId } = await client.send( + new StartQueryCommand({ + logGroupName, + startTime: startTimeSec, + endTime: endTimeSec, + queryString: [ + 'filter msg in ["Attempting delivery", "Admission denied"]', + `| stats sum(msg = "Attempting delivery") as deliveryAttempts by bin(@timestamp, ${RATE_TIMELINE_BIN_SECONDS}s) as timeBin`, + "| sort timeBin asc", + ].join("\n"), + }), + ); + + if (!queryId) return []; + + return pollAllQueryResults(client, queryId, (row) => { + const timeBinStr = row.find((f) => f.field === "timeBin")?.value ?? "0"; + const timestampSec = Math.floor(new Date(timeBinStr).getTime() / 1000); + const deliveryAttempts = Number( + row.find((f) => f.field === "deliveryAttempts")?.value ?? 0, + ); + + return { timestampSec, deliveryAttempts }; + }); +} diff --git a/lambdas/perf-runner-lambda/src/elasticache.ts b/lambdas/perf-runner-lambda/src/elasticache.ts new file mode 100644 index 00000000..8f41ad9e --- /dev/null +++ b/lambdas/perf-runner-lambda/src/elasticache.ts @@ -0,0 +1,110 @@ +import { type RedisClientType, createClient } from "@redis/client"; +import { SignatureV4 } from "@smithy/signature-v4"; +import { Sha256 } from "@aws-crypto/sha256-js"; +import { fromNodeProviderChain } from "@aws-sdk/credential-providers"; +import type { ElastiCacheDeps, EndpointRateLimitState } from "types"; + +const TOKEN_EXPIRY_SECONDS = 900; + +async function generateIamToken(deps: ElastiCacheDeps): Promise { + const signer = new SignatureV4({ + credentials: fromNodeProviderChain(), + region: deps.region, + service: "elasticache", + sha256: Sha256, + }); + + const signed = await signer.presign( + { + protocol: "https:", + method: "GET", + hostname: deps.cacheName, + path: "/", + query: { Action: "connect", User: deps.iamUsername }, + headers: { host: deps.cacheName }, + }, + { expiresIn: TOKEN_EXPIRY_SECONDS }, + ); + + const qs = new URLSearchParams( + signed.query as Record, + ).toString(); + return `${deps.cacheName}/?${qs}`; +} + +export async function flushElastiCache(deps: ElastiCacheDeps): Promise { + const token = await generateIamToken(deps); + + const client: RedisClientType = createClient({ + url: `rediss://${deps.endpoint}:6379`, + username: deps.iamUsername, + password: token, + }); + + try { + await client.connect(); + await client.flushAll(); + } finally { + if (client.isOpen) { + await client.disconnect(); + } + } +} + +const RATE_LIMIT_HASH_FIELDS = [ + "is_open", + "switched_at", + "bucket_tokens", + "bucket_refilled_at", + "cur_attempts", + "prev_attempts", + "cur_failures", + "prev_failures", + "sample_till", +] as const; + +export async function dumpRateLimitState( + deps: ElastiCacheDeps, +): Promise { + const token = await generateIamToken(deps); + + const client: RedisClientType = createClient({ + url: `rediss://${deps.endpoint}:6379`, + username: deps.iamUsername, + password: token, + }); + + try { + await client.connect(); + + const keys: string[] = []; + for await (const key of client.scanIterator({ MATCH: "ep:*" })) { + keys.push(key); + } + + // eslint-disable-next-line sonarjs/null-dereference -- false positive: keys is string[] + keys.sort((a, b) => a.localeCompare(b)); + const states: EndpointRateLimitState[] = []; + for (const key of keys) { + const values = await client.hmGet(key, [...RATE_LIMIT_HASH_FIELDS]); + states.push({ + key, + isOpen: values[0], + switchedAt: values[1], + bucketTokens: values[2], + bucketRefilledAt: values[3], + curAttempts: values[4], + prevAttempts: values[5], + curFailures: values[6], + prevFailures: values[7], + sampleTill: values[8], + }); + } + + return states; + } finally { + if (client.isOpen) { + await client.disconnect(); + } + } +} diff --git a/lambdas/perf-runner-lambda/src/index.ts b/lambdas/perf-runner-lambda/src/index.ts index a0881866..5974627b 100644 --- a/lambdas/perf-runner-lambda/src/index.ts +++ b/lambdas/perf-runner-lambda/src/index.ts @@ -3,7 +3,11 @@ import { SQSClient } from "@aws-sdk/client-sqs"; import { Logger } from "@nhs-notify-client-callbacks/logger"; import { runPerformanceTest } from "runner"; import { DEFAULT_SCENARIO } from "scenario"; -import type { PerfRunnerPayload, PerformanceResult } from "types"; +import type { + ElastiCacheDeps, + PerfRunnerPayload, + PerformanceResult, +} from "types"; const logger = new Logger(); @@ -16,6 +20,10 @@ export async function handler( const queueUrl = process.env.INBOUND_QUEUE_URL; const logGroupName = process.env.TRANSFORM_FILTER_LOG_GROUP; const deliveryLogGroupPrefix = process.env.DELIVERY_LOG_GROUP_PREFIX; + const mockWebhookLogGroup = process.env.MOCK_WEBHOOK_LOG_GROUP; + const elasticacheEndpoint = process.env.ELASTICACHE_ENDPOINT; + const elasticacheCacheName = process.env.ELASTICACHE_CACHE_NAME; + const elasticacheIamUsername = process.env.ELASTICACHE_IAM_USERNAME; if (!queueUrl) { throw new Error("Missing required environment variable: INBOUND_QUEUE_URL"); @@ -30,6 +38,16 @@ export async function handler( const sqsClient = new SQSClient({ region }); const cloudWatchClient = new CloudWatchLogsClient({ region }); + const elastiCacheDeps: ElastiCacheDeps | undefined = + elasticacheEndpoint && elasticacheCacheName && elasticacheIamUsername + ? { + endpoint: elasticacheEndpoint, + cacheName: elasticacheCacheName, + iamUsername: elasticacheIamUsername, + region, + } + : undefined; + logger.info("Performance test started", { testId }); try { @@ -40,9 +58,12 @@ export async function handler( queueUrl, logGroupName, deliveryLogGroupPrefix, + mockWebhookLogGroup, }, scenario, testId, + undefined, + elastiCacheDeps, ); logger.info("Performance test completed", { testId }); diff --git a/lambdas/perf-runner-lambda/src/purge.ts b/lambdas/perf-runner-lambda/src/purge.ts new file mode 100644 index 00000000..e363e706 --- /dev/null +++ b/lambdas/perf-runner-lambda/src/purge.ts @@ -0,0 +1,40 @@ +import { PurgeQueueCommand, type SQSClient } from "@aws-sdk/client-sqs"; +import type { Scenario } from "types"; + +export function deriveQueueUrls( + inboundQueueUrl: string, + scenario: Scenario, +): string[] { + // eslint-disable-next-line sonarjs/null-dereference -- String.replace always returns a string + const baseUrl = inboundQueueUrl.replace(/inbound-event-queue$/, ""); + const clientIds = [...new Set(scenario.eventMix.map((e) => e.clientId))]; + + return [ + inboundQueueUrl, + `${baseUrl}inbound-event-dlq-queue`, + ...clientIds.flatMap((id) => [ + `${baseUrl}${id}-delivery-queue`, + `${baseUrl}${id}-delivery-dlq-queue`, + ]), + ]; +} + +export async function purgeQueues( + client: SQSClient, + queueUrls: string[], +): Promise { + const results = await Promise.allSettled( + queueUrls.map((url) => + client.send(new PurgeQueueCommand({ QueueUrl: url })), + ), + ); + + for (const result of results) { + if (result.status === "rejected") { + const error = result.reason as { name?: string }; + if (error.name !== "QueueDoesNotExist") { + throw result.reason as Error; + } + } + } +} diff --git a/lambdas/perf-runner-lambda/src/runner.ts b/lambdas/perf-runner-lambda/src/runner.ts index a265e90e..7a5b5ee6 100644 --- a/lambdas/perf-runner-lambda/src/runner.ts +++ b/lambdas/perf-runner-lambda/src/runner.ts @@ -1,13 +1,26 @@ import type { + CircuitBreakerSnapshot, DeliveryMetricsSnapshot, + ElastiCacheDeps, + EndpointRateLimitState, MetricsSnapshot, + PerClientRateTimeline, PerformanceResult, PhaseResult, RunnerDeps, Scenario, + WebhookVerificationResult, } from "types"; import { generatePhaseLoad } from "sqs"; -import { queryDeliveryMetricsSnapshot, queryMetricsSnapshot } from "cloudwatch"; +import { deriveQueueUrls, purgeQueues } from "purge"; +import { dumpRateLimitState, flushElastiCache } from "elasticache"; +import { verifyMockWebhook } from "webhook-verify"; +import { + queryCircuitBreakerSnapshot, + queryDeliveryMetricsSnapshot, + queryMetricsSnapshot, + queryPerClientRateTimeline, +} from "cloudwatch"; const CLOUDWATCH_SETTLING_MS = 60_000; @@ -25,11 +38,56 @@ function buildDeliveryLogGroupNames( return [...clientIds].map((id) => `${prefix}${id}`); } +async function collectSnapshots( + deps: RunnerDeps, + deliveryLogGroupNames: string[], + startSec: number, + endSec: number, + cbStartSec: number, + out: { + snapshots: MetricsSnapshot[]; + deliverySnapshots: DeliveryMetricsSnapshot[]; + cbSnapshots: CircuitBreakerSnapshot[]; + }, +): Promise { + const snap = await queryMetricsSnapshot( + deps.cloudWatchClient, + deps.logGroupName, + startSec, + endSec, + ); + if (snap !== null) out.snapshots.push(snap); + + if (deliveryLogGroupNames.length > 0) { + const deliverySnap = await queryDeliveryMetricsSnapshot( + deps.cloudWatchClient, + deliveryLogGroupNames, + startSec, + endSec, + ); + if (deliverySnap !== null) out.deliverySnapshots.push(deliverySnap); + + const cbSnap = await queryCircuitBreakerSnapshot( + deps.cloudWatchClient, + deliveryLogGroupNames, + cbStartSec, + endSec, + ); + if (cbSnap !== null) { + out.cbSnapshots.push(cbSnap); + return endSec; + } + } + + return cbStartSec; +} + export async function runPerformanceTest( deps: RunnerDeps, scenario: Scenario, testId: string, sleepFn: (ms: number) => Promise = defaultSleep, + elastiCacheDeps?: ElastiCacheDeps, ): Promise { if (scenario.eventMix.length === 0) { throw new Error("scenario.eventMix must contain at least one entry"); @@ -49,10 +107,24 @@ export async function runPerformanceTest( } const testStartMs = Date.now(); + + const queueUrls = deriveQueueUrls(deps.queueUrl, scenario); + await purgeQueues(deps.sqsClient, queueUrls); + if (elastiCacheDeps) { + await flushElastiCache(elastiCacheDeps); + } + + let rateLimitStateBefore: EndpointRateLimitState[] | undefined; + if (elastiCacheDeps) { + rateLimitStateBefore = await dumpRateLimitState(elastiCacheDeps); + } + const startedAt = new Date(testStartMs).toISOString(); const phaseResults: PhaseResult[] = []; const snapshots: MetricsSnapshot[] = []; const deliverySnapshots: DeliveryMetricsSnapshot[] = []; + const cbSnapshots: CircuitBreakerSnapshot[] = []; + let lastCbSnapshotSec = Math.floor(testStartMs / 1000); let stopPolling = false; const deliveryLogGroupNames = buildDeliveryLogGroupNames( @@ -60,29 +132,22 @@ export async function runPerformanceTest( scenario, ); + const out = { snapshots, deliverySnapshots, cbSnapshots }; + const pollLoop = async (): Promise => { await sleepFn(scenario.metricsIntervalSecs * 1000); while (!stopPolling) { const startSec = Math.floor(testStartMs / 1000); const endSec = Math.floor(Date.now() / 1000); - const snap = await queryMetricsSnapshot( - deps.cloudWatchClient, - deps.logGroupName, + lastCbSnapshotSec = await collectSnapshots( + deps, + deliveryLogGroupNames, startSec, endSec, + lastCbSnapshotSec, + out, ); - if (snap !== null) snapshots.push(snap); - - if (deliveryLogGroupNames.length > 0) { - const deliverySnap = await queryDeliveryMetricsSnapshot( - deps.cloudWatchClient, - deliveryLogGroupNames, - startSec, - endSec, - ); - if (deliverySnap !== null) deliverySnapshots.push(deliverySnap); - } if (!stopPolling) { await sleepFn(scenario.metricsIntervalSecs * 1000); @@ -110,22 +175,53 @@ export async function runPerformanceTest( const finalStartSec = Math.floor(testStartMs / 1000); const finalEndSec = Math.floor(Date.now() / 1000); - const finalSnap = await queryMetricsSnapshot( - deps.cloudWatchClient, - deps.logGroupName, + await collectSnapshots( + deps, + deliveryLogGroupNames, finalStartSec, finalEndSec, + lastCbSnapshotSec, + out, ); - if (finalSnap !== null) snapshots.push(finalSnap); - if (deliveryLogGroupNames.length > 0) { - const finalDeliverySnap = await queryDeliveryMetricsSnapshot( + const perClientRateTimelines: PerClientRateTimeline[] = []; + + if (deps.deliveryLogGroupPrefix) { + const clientIds = [...new Set(scenario.eventMix.map((e) => e.clientId))]; + const timelinePromises = clientIds.map(async (clientId) => { + const logGroupName = `${deps.deliveryLogGroupPrefix}${clientId}`; + const entries = await queryPerClientRateTimeline( + deps.cloudWatchClient, + logGroupName, + finalStartSec, + finalEndSec, + ); + return { clientId, entries }; + }); + const timelines = await Promise.all(timelinePromises); + perClientRateTimelines.push( + ...timelines.filter((t) => t.entries.length > 0), + ); + } + + let webhookVerification: WebhookVerificationResult | undefined; + if (deps.mockWebhookLogGroup) { + webhookVerification = await verifyMockWebhook( deps.cloudWatchClient, - deliveryLogGroupNames, + deps.mockWebhookLogGroup, finalStartSec, finalEndSec, ); - if (finalDeliverySnap !== null) deliverySnapshots.push(finalDeliverySnap); + } + + let rateLimitStateAfter: EndpointRateLimitState[] | undefined; + if (elastiCacheDeps) { + rateLimitStateAfter = await dumpRateLimitState(elastiCacheDeps); + } + + await purgeQueues(deps.sqsClient, queueUrls); + if (elastiCacheDeps) { + await flushElastiCache(elastiCacheDeps); } return { @@ -136,5 +232,10 @@ export async function runPerformanceTest( phases: phaseResults, metrics: snapshots, deliveryMetrics: deliverySnapshots, + circuitBreakerMetrics: cbSnapshots, + perClientRateTimelines, + webhookVerification, + rateLimitStateBefore, + rateLimitStateAfter, }; } diff --git a/lambdas/perf-runner-lambda/src/types.ts b/lambdas/perf-runner-lambda/src/types.ts index 5366602d..4415ef63 100644 --- a/lambdas/perf-runner-lambda/src/types.ts +++ b/lambdas/perf-runner-lambda/src/types.ts @@ -55,6 +55,48 @@ export type DeliveryMetricsSnapshot = { p99Ms: number; }; +export type CircuitBreakerSnapshot = { + snapshotAt: number; + intervalStartSec: number; + intervalEndSec: number; + circuitOpenEvents: number; + circuitCloseEvents: number; + admissionDeniedCircuitOpen: number; + admissionDeniedRateLimited: number; + deliveryAttempts: number; + deliverySuccesses: number; + deliveryFailures: number; + deliveryRateLimited: number; +}; + +export type PerClientRateEntry = { + timestampSec: number; + deliveryAttempts: number; +}; + +export type PerClientRateTimeline = { + clientId: string; + entries: PerClientRateEntry[]; +}; + +export type EndpointRateLimitState = { + key: string; + isOpen: string | null; + switchedAt: string | null; + bucketTokens: string | null; + bucketRefilledAt: string | null; + curAttempts: string | null; + prevAttempts: string | null; + curFailures: string | null; + prevFailures: string | null; + sampleTill: string | null; +}; + +export type WebhookVerificationResult = { + receivedCallbacks: number; + verified: boolean; +}; + export type PerformanceResult = { testId: string; scenario: Scenario; @@ -63,6 +105,11 @@ export type PerformanceResult = { phases: PhaseResult[]; metrics: MetricsSnapshot[]; deliveryMetrics: DeliveryMetricsSnapshot[]; + circuitBreakerMetrics: CircuitBreakerSnapshot[]; + perClientRateTimelines?: PerClientRateTimeline[]; + webhookVerification?: WebhookVerificationResult; + rateLimitStateBefore?: EndpointRateLimitState[]; + rateLimitStateAfter?: EndpointRateLimitState[]; }; export type PerfRunnerPayload = { @@ -76,4 +123,12 @@ export type RunnerDeps = { queueUrl: string; logGroupName: string; deliveryLogGroupPrefix?: string; + mockWebhookLogGroup?: string; +}; + +export type ElastiCacheDeps = { + endpoint: string; + cacheName: string; + iamUsername: string; + region: string; }; diff --git a/lambdas/perf-runner-lambda/src/webhook-verify.ts b/lambdas/perf-runner-lambda/src/webhook-verify.ts new file mode 100644 index 00000000..77c1fa6d --- /dev/null +++ b/lambdas/perf-runner-lambda/src/webhook-verify.ts @@ -0,0 +1,59 @@ +import { + type CloudWatchLogsClient, + GetQueryResultsCommand, + StartQueryCommand, +} from "@aws-sdk/client-cloudwatch-logs"; +import type { WebhookVerificationResult } from "types"; + +const INSIGHTS_POLL_INTERVAL_MS = 2000; +const INSIGHTS_TIMEOUT_MS = 30_000; + +export async function verifyMockWebhook( + client: CloudWatchLogsClient, + logGroupName: string, + startTimeSec: number, + endTimeSec: number, +): Promise { + const { queryId } = await client.send( + new StartQueryCommand({ + logGroupName, + startTime: startTimeSec, + endTime: endTimeSec, + queryString: [ + 'filter msg = "Callback received"', + "| stats count(*) as callbackCount", + ].join("\n"), + }), + ); + + if (!queryId) { + return { receivedCallbacks: 0, verified: false }; + } + + const deadline = Date.now() + INSIGHTS_TIMEOUT_MS; + + while (Date.now() < deadline) { + await new Promise((resolve) => { + setTimeout(resolve, INSIGHTS_POLL_INTERVAL_MS); + }); + + const response = await client.send(new GetQueryResultsCommand({ queryId })); + + if (response.status === "Failed" || response.status === "Cancelled") { + return { receivedCallbacks: 0, verified: false }; + } + + if (response.status === "Complete") { + const rows = + (response.results as { field?: string; value?: string }[][]) ?? []; + const row = rows[0] ?? []; + const count = Number( + row.find((f) => f.field === "callbackCount")?.value ?? 0, + ); + + return { receivedCallbacks: count, verified: count > 0 }; + } + } + + return { receivedCallbacks: 0, verified: false }; +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c497eafb..f2b2aa3a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -450,21 +450,30 @@ importers: lambdas/perf-runner-lambda: dependencies: + '@aws-crypto/sha256-js': + specifier: catalog:aws + version: 5.2.0 '@aws-sdk/client-cloudwatch-logs': specifier: catalog:aws version: 3.1026.0 '@aws-sdk/client-sqs': specifier: catalog:aws version: 3.1026.0 + '@aws-sdk/credential-providers': + specifier: catalog:aws + version: 3.1026.0 '@nhs-notify-client-callbacks/logger': specifier: workspace:* version: link:../../src/logger '@nhs-notify-client-callbacks/models': specifier: workspace:* version: link:../../src/models - esbuild: - specifier: catalog:tools - version: 0.28.0 + '@redis/client': + specifier: catalog:app + version: 1.6.1 + '@smithy/signature-v4': + specifier: catalog:aws + version: 5.3.13 devDependencies: '@tsconfig/node22': specifier: catalog:tools @@ -478,6 +487,9 @@ importers: '@types/node': specifier: catalog:tools version: 25.6.0 + esbuild: + specifier: catalog:tools + version: 0.28.0 eslint: specifier: catalog:lint version: 9.39.4(jiti@2.6.1) diff --git a/scripts/tests/integration-debug.sh b/scripts/tests/integration-debug.sh index 15329969..b6882528 100755 --- a/scripts/tests/integration-debug.sh +++ b/scripts/tests/integration-debug.sh @@ -8,6 +8,7 @@ set -euo pipefail # ENVIRONMENT= AWS_PROFILE= make test-integration-debug ACTION=queue-status # ENVIRONMENT= AWS_PROFILE= make test-integration-debug ACTION=tail-transform # ENVIRONMENT= AWS_PROFILE= make test-integration-debug ACTION=tail-transform LOG_FILTER= +# ENVIRONMENT= AWS_PROFILE= FOLLOW=false make test-integration-debug ACTION=tail-transform > out.log # # Actions: # queue-status Show SQS queue message counts @@ -28,6 +29,8 @@ set -euo pipefail # # Optional: # LOG_FILTER CloudWatch Logs filter pattern / text +# LOG_SINCE How far back to start tailing logs (default: 30m, e.g. 1h, 2h, 30m) +# FOLLOW Follow logs continuously (default: true, set false to dump and exit) # AWS_REGION (default: eu-west-2) if [[ -z "${ENVIRONMENT:-}" ]]; then @@ -49,7 +52,9 @@ fi REGION="${AWS_REGION:-eu-west-2}" LOG_FILTER="${LOG_FILTER:-}" +LOG_SINCE="${LOG_SINCE:-30m}" CLIENT_ID="${CLIENT_ID:-}" +FOLLOW="${FOLLOW:-true}" if ! aws sts get-caller-identity --profile "$AWS_PROFILE" >/dev/null 2>&1; then echo "No active AWS SSO session for profile '$AWS_PROFILE'. Running aws sso login..." @@ -149,18 +154,26 @@ log_filter_args() { return 0 } +follow_args() { + if [[ "$FOLLOW" == "true" ]]; then + printf '%s\n' --follow + fi +} + action_tail_transform() { local -a filter_args=() + local -a follow_arg=() mapfile -t filter_args < <(log_filter_args) + mapfile -t follow_arg < <(follow_args) print_section "Transform/Filter Lambda Logs" aws logs tail \ "/aws/lambda/${PREFIX}-client-transform-filter" \ --region "$REGION" \ --profile "$AWS_PROFILE" \ - --since 30m \ - --follow \ + --since "$LOG_SINCE" \ --format short \ + "${follow_arg[@]}" \ "${filter_args[@]}" return 0 } @@ -168,31 +181,35 @@ action_tail_transform() { action_tail_https_client() { require_client_id local -a filter_args=() + local -a follow_arg=() mapfile -t filter_args < <(log_filter_args) + mapfile -t follow_arg < <(follow_args) print_section "HTTPS Client Lambda Logs" aws logs tail \ "/aws/lambda/${PREFIX}-https-client-${CLIENT_ID}" \ --region "$REGION" \ --profile "$AWS_PROFILE" \ - --since 30m \ - --follow \ + --since "$LOG_SINCE" \ --format short \ + "${follow_arg[@]}" \ "${filter_args[@]}" } action_tail_webhook() { local -a filter_args=() + local -a follow_arg=() mapfile -t filter_args < <(log_filter_args) + mapfile -t follow_arg < <(follow_args) print_section "Mock Webhook Lambda Logs" aws logs tail \ "/aws/lambda/${PREFIX}-mock-webhook" \ --region "$REGION" \ --profile "$AWS_PROFILE" \ - --since 30m \ - --follow \ + --since "$LOG_SINCE" \ --format short \ + "${follow_arg[@]}" \ "${filter_args[@]}" return 0 } @@ -219,14 +236,17 @@ action_tail_pipe() { pipe_log_group_name="${pipe_log_group_arn#*:log-group:}" + local -a follow_arg=() + mapfile -t follow_arg < <(follow_args) + print_section "EventBridge Pipe Logs" aws logs tail \ "$pipe_log_group_name" \ --region "$REGION" \ --profile "$AWS_PROFILE" \ - --since 30m \ - --follow \ + --since "$LOG_SINCE" \ --format short \ + "${follow_arg[@]}" \ "${filter_args[@]}" return 0 } diff --git a/tests/integration/delivery-resilience.test.ts b/tests/integration/delivery-resilience.test.ts index 8b218233..e3b2218b 100644 --- a/tests/integration/delivery-resilience.test.ts +++ b/tests/integration/delivery-resilience.test.ts @@ -108,7 +108,7 @@ describe("Delivery Resilience", () => { }); describe("Rate Limiting", () => { - const BURST_SIZE = 15; + const BURST_SIZE = 30; let dlqUrl: string; let deliveryUrl: string; let httpsClientLogGroup: string; @@ -200,9 +200,27 @@ describe("Delivery Resilience", () => { it("should open the circuit breaker after repeated failures and not affect other clients", async () => { const cbConfig = getClientConfig("clientCircuitBreaker"); + const cbTargetPath = buildMockWebhookTargetPath("clientCircuitBreaker"); const singleTargetConfig = getClientConfig("clientSingleTarget"); const singleTargetPath = buildMockWebhookTargetPath("clientSingleTarget"); + // Send a successful message first so the circuit is confirmed closed (it starts half-open) + const warmupEvent = createMessageStatusPublishEvent({ + data: { + clientId: cbConfig.clientId, + messageId: `cb-warmup-${crypto.randomUUID()}`, + }, + }); + await sendSqsEvent(ctx.sqs, ctx.inboundQueueUrl, warmupEvent); + const warmupCallback = await awaitCallback( + ctx.cwLogs, + ctx.webhookLogGroup, + warmupEvent.data.messageId, + "MessageStatus", + ctx.startTime, + ); + expect(warmupCallback.path).toBe(cbTargetPath); + const cbEvents = Array.from({ length: CB_BURST_SIZE }, () => createMessageStatusPublishEvent({ data: { From 7bcb4607a1840c51449716ebbc64b0df3b348d3c Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Wed, 29 Apr 2026 16:07:41 +0100 Subject: [PATCH 30/65] Fix flakey retry policy tests --- .../src/__tests__/retry-policy.test.ts | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts b/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts index de828762..c53927fb 100644 --- a/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts @@ -48,6 +48,14 @@ describe("jitteredBackoffSeconds", () => { }); describe("parseRetryAfter", () => { + beforeEach(() => { + jest.useFakeTimers({ now: 10_000_000 }); + }); + + afterEach(() => { + jest.useRealTimers(); + }); + it("parses integer string", () => { expect(parseRetryAfter("120")).toBe(120); }); @@ -57,14 +65,12 @@ describe("parseRetryAfter", () => { }); it("parses HTTP date string", () => { - const futureDate = new Date(Date.now() + 60_000); - const result = parseRetryAfter(futureDate.toUTCString()); - expect(result).toBeGreaterThanOrEqual(58); - expect(result).toBeLessThanOrEqual(61); + const futureDate = new Date(10_060_000); + expect(parseRetryAfter(futureDate.toUTCString())).toBe(60); }); it("returns 0 for past HTTP date", () => { - const pastDate = new Date(Date.now() - 60_000); + const pastDate = new Date(9_940_000); expect(parseRetryAfter(pastDate.toUTCString())).toBe(0); }); @@ -74,19 +80,24 @@ describe("parseRetryAfter", () => { }); describe("isWindowExhausted", () => { + beforeEach(() => { + jest.useFakeTimers({ now: 10_000 }); + }); + + afterEach(() => { + jest.useRealTimers(); + }); + it("returns false just below limit", () => { - const firstReceived = Date.now() - 999; - expect(isWindowExhausted(firstReceived, 1000)).toBe(false); + expect(isWindowExhausted(9001, 1000)).toBe(false); }); it("returns true at limit", () => { - const firstReceived = Date.now() - 1000; - expect(isWindowExhausted(firstReceived, 1000)).toBe(true); + expect(isWindowExhausted(9000, 1000)).toBe(true); }); it("returns true beyond limit", () => { - const firstReceived = Date.now() - 2000; - expect(isWindowExhausted(firstReceived, 1000)).toBe(true); + expect(isWindowExhausted(8000, 1000)).toBe(true); }); }); From 0b5cddac6d92a7aa5a9f227b2bc6b9ed94c22e77 Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Thu, 30 Apr 2026 09:02:27 +0100 Subject: [PATCH 31/65] CCM-16073 - Addressed PR feedback --- .../terraform/components/callbacks/README.md | 4 +- .../callbacks/elasticache_delivery_state.tf | 6 +- .../callbacks/module_client_delivery.tf | 7 +- .../callbacks/s3_bucket_mtls_test_certs.tf | 3 + .../components/callbacks/variables.tf | 16 +++- .../modules/client-delivery/README.md | 7 +- .../client-delivery/iam_role_sqs_target.tf | 24 +----- .../module_https_client_lambda.tf | 7 +- .../modules/client-delivery/variables.tf | 18 ++--- lambdas/https-client-lambda/package.json | 1 - .../src/__tests__/delivery-metrics.test.ts | 50 +++++++++--- .../__tests__/delivery-observability.test.ts | 71 ++++++++++++++--- .../src/__tests__/handler.test.ts | 62 +++++++++++++-- .../src/__tests__/tls-agent-factory.test.ts | 76 +++---------------- lambdas/https-client-lambda/src/handler.ts | 36 ++++++--- .../src/services/delivery-metrics.ts | 33 +++++--- .../src/services/delivery-observability.ts | 19 +++-- .../services/delivery/tls-agent-factory.ts | 62 +++------------ pnpm-lock.yaml | 54 ------------- pnpm-workspace.yaml | 1 - .../__tests__/client-config-schema.test.ts | 7 -- 21 files changed, 285 insertions(+), 279 deletions(-) diff --git a/infrastructure/terraform/components/callbacks/README.md b/infrastructure/terraform/components/callbacks/README.md index e090abb9..9889ab22 100644 --- a/infrastructure/terraform/components/callbacks/README.md +++ b/infrastructure/terraform/components/callbacks/README.md @@ -34,7 +34,9 @@ | [log\_level](#input\_log\_level) | The log level to be used in lambda functions within the component. Any log with a lower severity than the configured value will not be logged: https://docs.python.org/3/library/logging.html#levels | `string` | `"INFO"` | no | | [log\_retention\_in\_days](#input\_log\_retention\_in\_days) | The retention period in days for the Cloudwatch Logs events to be retained, default of 0 is indefinite | `number` | `0` | no | | [message\_root\_uri](#input\_message\_root\_uri) | The root URI used for constructing message links in callback payloads | `string` | n/a | yes | -| [mtls\_cert\_secret\_arn](#input\_mtls\_cert\_secret\_arn) | Secrets Manager ARN for the shared mTLS client certificate (production) | `string` | `""` | no | +| [mtls\_ca\_s3\_key](#input\_mtls\_ca\_s3\_key) | S3 key for the CA certificate PEM bundle used for server verification | `string` | `""` | no | +| [mtls\_cert\_s3\_bucket](#input\_mtls\_cert\_s3\_bucket) | S3 bucket containing the mTLS client certificate bundle | `string` | `""` | no | +| [mtls\_cert\_s3\_key](#input\_mtls\_cert\_s3\_key) | S3 key for the mTLS client certificate PEM bundle | `string` | `""` | no | | [parent\_acct\_environment](#input\_parent\_acct\_environment) | Name of the environment responsible for the acct resources used, affects things like DNS zone. Useful for named dev environments | `string` | `"main"` | no | | [pipe\_event\_patterns](#input\_pipe\_event\_patterns) | value | `list(string)` | `[]` | no | | [pipe\_log\_level](#input\_pipe\_log\_level) | Log level for the EventBridge Pipe. | `string` | `"ERROR"` | no | diff --git a/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf b/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf index 6b5d3da1..2d7fd948 100644 --- a/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf +++ b/infrastructure/terraform/components/callbacks/elasticache_delivery_state.tf @@ -128,10 +128,10 @@ resource "aws_vpc_security_group_egress_rule" "lambda_to_elasticache" { resource "aws_vpc_security_group_egress_rule" "lambda_to_https" { security_group_id = aws_security_group.https_client_lambda.id cidr_ipv4 = "0.0.0.0/0" - from_port = 0 - to_port = 65535 + from_port = 443 + to_port = 443 ip_protocol = "tcp" - description = "Allow Lambda outbound TCP for HTTPS webhook delivery (port defined per-client in webhook URL)" + description = "Allow Lambda outbound HTTPS for webhook delivery" tags = local.default_tags } diff --git a/infrastructure/terraform/components/callbacks/module_client_delivery.tf b/infrastructure/terraform/components/callbacks/module_client_delivery.tf index 5122606e..cce31bd5 100644 --- a/infrastructure/terraform/components/callbacks/module_client_delivery.tf +++ b/infrastructure/terraform/components/callbacks/module_client_delivery.tf @@ -36,10 +36,9 @@ module "client_delivery" { elasticache_cache_name = aws_elasticache_serverless_cache.delivery_state.name elasticache_iam_username = "${var.project}-${var.environment}-${var.component}-elasticache-user" - mtls_cert_secret_arn = var.mtls_cert_secret_arn - mtls_test_cert_s3_bucket = var.deploy_mock_clients ? module.mtls_test_certs_bucket[0].bucket : "" - mtls_test_cert_s3_key = local.mtls_test_cert_s3_key # gitleaks:allow - mtls_test_ca_s3_key = local.mtls_test_ca_s3_key # gitleaks:allow + mtls_cert_s3_bucket = local.mtls_cert_s3_bucket + mtls_cert_s3_key = local.mtls_cert_s3_key # gitleaks:allow + mtls_ca_s3_key = local.mtls_ca_s3_key # gitleaks:allow token_bucket_burst_capacity = var.token_bucket_burst_capacity diff --git a/infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf b/infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf index e1bd377e..c29806cd 100644 --- a/infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf +++ b/infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf @@ -61,6 +61,9 @@ locals { mtls_test_certs_s3_prefix = "callbacks/mtls-test" mtls_test_cert_s3_key = "${local.mtls_test_certs_s3_prefix}/client-bundle.pem" mtls_test_ca_s3_key = "${local.mtls_test_certs_s3_prefix}/ca.pem" + mtls_cert_s3_bucket = var.deploy_mock_clients ? module.mtls_test_certs_bucket[0].bucket : var.mtls_cert_s3_bucket + mtls_cert_s3_key = var.deploy_mock_clients ? local.mtls_test_cert_s3_key : var.mtls_cert_s3_key # gitleaks:allow + mtls_ca_s3_key = var.deploy_mock_clients ? local.mtls_test_ca_s3_key : var.mtls_ca_s3_key # gitleaks:allow } # --- TLS provider: generate test CA, client, and server certificates --- diff --git a/infrastructure/terraform/components/callbacks/variables.tf b/infrastructure/terraform/components/callbacks/variables.tf index aef32373..68e4eafd 100644 --- a/infrastructure/terraform/components/callbacks/variables.tf +++ b/infrastructure/terraform/components/callbacks/variables.tf @@ -184,9 +184,21 @@ variable "s3_enable_force_destroy" { default = false } -variable "mtls_cert_secret_arn" { +variable "mtls_cert_s3_bucket" { type = string - description = "Secrets Manager ARN for the shared mTLS client certificate (production)" + description = "S3 bucket containing the mTLS client certificate bundle" + default = "" +} + +variable "mtls_cert_s3_key" { + type = string + description = "S3 key for the mTLS client certificate PEM bundle" + default = "" +} + +variable "mtls_ca_s3_key" { + type = string + description = "S3 key for the CA certificate PEM bundle used for server verification" default = "" } diff --git a/infrastructure/terraform/modules/client-delivery/README.md b/infrastructure/terraform/modules/client-delivery/README.md index 2036c60d..22b98e26 100644 --- a/infrastructure/terraform/modules/client-delivery/README.md +++ b/infrastructure/terraform/modules/client-delivery/README.md @@ -35,10 +35,9 @@ No requirements. | [log\_retention\_in\_days](#input\_log\_retention\_in\_days) | CloudWatch log retention period in days | `number` | `0` | no | | [log\_subscription\_role\_arn](#input\_log\_subscription\_role\_arn) | IAM role ARN for CloudWatch log subscription | `string` | `""` | no | | [max\_retry\_duration\_seconds](#input\_max\_retry\_duration\_seconds) | Maximum retry window before messages are sent to DLQ | `number` | `7200` | no | -| [mtls\_cert\_secret\_arn](#input\_mtls\_cert\_secret\_arn) | Secrets Manager ARN for the mTLS client certificate | `string` | `""` | no | -| [mtls\_test\_ca\_s3\_key](#input\_mtls\_test\_ca\_s3\_key) | S3 key for dev CA certificate PEM bundle used for server verification | `string` | `""` | no | -| [mtls\_test\_cert\_s3\_bucket](#input\_mtls\_test\_cert\_s3\_bucket) | S3 bucket for dev mTLS test certificates | `string` | `""` | no | -| [mtls\_test\_cert\_s3\_key](#input\_mtls\_test\_cert\_s3\_key) | S3 key for dev mTLS test certificate bundle | `string` | `""` | no | +| [mtls\_ca\_s3\_key](#input\_mtls\_ca\_s3\_key) | S3 key for the CA certificate PEM bundle used for server verification | `string` | `""` | no | +| [mtls\_cert\_s3\_bucket](#input\_mtls\_cert\_s3\_bucket) | S3 bucket containing the mTLS client certificate bundle | `string` | `""` | no | +| [mtls\_cert\_s3\_key](#input\_mtls\_cert\_s3\_key) | S3 key for the mTLS client certificate PEM bundle | `string` | `""` | no | | [project](#input\_project) | The name of the tfscaffold project | `string` | n/a | yes | | [region](#input\_region) | AWS Region | `string` | n/a | yes | | [sqs\_max\_receive\_count](#input\_sqs\_max\_receive\_count) | Safety-net maximum receive count before a message moves to DLQ. Supplements the time-based retry window for cases where the Lambda fails before reaching the window check. | `number` | `100` | no | diff --git a/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf b/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf index 55162684..2f8e3c28 100644 --- a/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf +++ b/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf @@ -100,25 +100,9 @@ data "aws_iam_policy_document" "https_client_lambda" { } dynamic "statement" { - for_each = var.mtls_cert_secret_arn != "" ? [1] : [] + for_each = var.mtls_cert_s3_bucket != "" ? [1] : [] content { - sid = "SecretsManagerMTLSCert" - effect = "Allow" - - actions = [ - "secretsmanager:GetSecretValue", - ] - - resources = [ - var.mtls_cert_secret_arn, - ] - } - } - - dynamic "statement" { - for_each = var.mtls_test_cert_s3_bucket != "" ? [1] : [] - content { - sid = "S3MTLSTestCertReadAccess" + sid = "S3MTLSCertReadAccess" effect = "Allow" actions = [ @@ -126,8 +110,8 @@ data "aws_iam_policy_document" "https_client_lambda" { ] resources = [ - "arn:aws:s3:::${var.mtls_test_cert_s3_bucket}/${var.mtls_test_cert_s3_key}", - "arn:aws:s3:::${var.mtls_test_cert_s3_bucket}/${var.mtls_test_ca_s3_key}", + "arn:aws:s3:::${var.mtls_cert_s3_bucket}/${var.mtls_cert_s3_key}", + "arn:aws:s3:::${var.mtls_cert_s3_bucket}/${var.mtls_ca_s3_key}", ] } } diff --git a/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf b/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf index 0021fb80..a1bb48f2 100644 --- a/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf +++ b/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf @@ -48,10 +48,9 @@ module "https_client_lambda" { ENVIRONMENT = var.environment MAX_RETRY_DURATION_SECONDS = tostring(var.max_retry_duration_seconds) METRICS_NAMESPACE = "nhs-notify-client-callbacks" - MTLS_CERT_SECRET_ARN = var.mtls_cert_secret_arn - MTLS_TEST_CA_S3_KEY = var.mtls_test_ca_s3_key # gitleaks:allow - MTLS_TEST_CERT_S3_BUCKET = var.mtls_test_cert_s3_bucket - MTLS_TEST_CERT_S3_KEY = var.mtls_test_cert_s3_key # gitleaks:allow + MTLS_CA_S3_KEY = var.mtls_ca_s3_key # gitleaks:allow + MTLS_CERT_S3_BUCKET = var.mtls_cert_s3_bucket + MTLS_CERT_S3_KEY = var.mtls_cert_s3_key # gitleaks:allow QUEUE_URL = module.sqs_delivery.sqs_queue_url TOKEN_BUCKET_BURST_CAPACITY = tostring(var.token_bucket_burst_capacity) } diff --git a/infrastructure/terraform/modules/client-delivery/variables.tf b/infrastructure/terraform/modules/client-delivery/variables.tf index 801ca291..46f66f45 100644 --- a/infrastructure/terraform/modules/client-delivery/variables.tf +++ b/infrastructure/terraform/modules/client-delivery/variables.tf @@ -157,27 +157,21 @@ variable "enable_xray_tracing" { default = false } -variable "mtls_cert_secret_arn" { +variable "mtls_cert_s3_bucket" { type = string - description = "Secrets Manager ARN for the mTLS client certificate" + description = "S3 bucket containing the mTLS client certificate bundle" default = "" } -variable "mtls_test_cert_s3_bucket" { +variable "mtls_cert_s3_key" { type = string - description = "S3 bucket for dev mTLS test certificates" + description = "S3 key for the mTLS client certificate PEM bundle" default = "" } -variable "mtls_test_cert_s3_key" { +variable "mtls_ca_s3_key" { type = string - description = "S3 key for dev mTLS test certificate bundle" - default = "" -} - -variable "mtls_test_ca_s3_key" { - type = string - description = "S3 key for dev CA certificate PEM bundle used for server verification" + description = "S3 key for the CA certificate PEM bundle used for server verification" default = "" } diff --git a/lambdas/https-client-lambda/package.json b/lambdas/https-client-lambda/package.json index bc08ca4b..88b36769 100644 --- a/lambdas/https-client-lambda/package.json +++ b/lambdas/https-client-lambda/package.json @@ -2,7 +2,6 @@ "dependencies": { "@aws-crypto/sha256-js": "catalog:aws", "@aws-sdk/client-s3": "catalog:aws", - "@aws-sdk/client-secrets-manager": "catalog:aws", "@aws-sdk/client-sqs": "catalog:aws", "@aws-sdk/client-ssm": "catalog:aws", "@aws-sdk/credential-providers": "catalog:aws", diff --git a/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts b/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts index 249b0148..635a3666 100644 --- a/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts @@ -20,11 +20,13 @@ describe("delivery-metrics", () => { mockCreateMetricsLogger.mockReturnValue(mockMetrics); process.env.METRICS_NAMESPACE = "TestNamespace"; process.env.ENVIRONMENT = "test"; + process.env.CLIENT_ID = "client-1"; }); afterEach(() => { delete process.env.METRICS_NAMESPACE; delete process.env.ENVIRONMENT; + delete process.env.CLIENT_ID; }); it("throws when METRICS_NAMESPACE is not set", async () => { @@ -47,6 +49,16 @@ describe("delivery-metrics", () => { ); }); + it("throws when CLIENT_ID is not set", async () => { + delete process.env.CLIENT_ID; + // @ts-expect-error -- modulePaths resolves at runtime + const { emitDeliveryAttempt } = await import("services/delivery-metrics"); + + expect(() => emitDeliveryAttempt("t-1")).toThrow( + "CLIENT_ID environment variable is not set", + ); + }); + it("creates metrics logger with correct namespace and dimensions", async () => { // @ts-expect-error -- modulePaths resolves at runtime const { emitDeliveryAttempt } = await import("services/delivery-metrics"); @@ -56,6 +68,7 @@ describe("delivery-metrics", () => { expect(mockMetrics.setNamespace).toHaveBeenCalledWith("TestNamespace"); expect(mockMetrics.setDimensions).toHaveBeenCalledWith({ Environment: "test", + ClientId: "client-1", }); }); @@ -146,15 +159,15 @@ describe("delivery-metrics", () => { ); }); - it("emitRateLimited emits correct metric", async () => { + it("emitServerRateLimited emits correct metric", async () => { // @ts-expect-error -- modulePaths resolves at runtime const mod = await import("services/delivery-metrics"); - const { emitRateLimited } = mod; + const { emitServerRateLimited } = mod; - emitRateLimited("target-42"); + emitServerRateLimited("target-42"); expect(mockMetrics.putMetric).toHaveBeenCalledWith( - "DeliveryRateLimited", + "DeliveryServerRateLimited", 1, "Count", 1, @@ -191,24 +204,39 @@ describe("delivery-metrics", () => { ); }); - it("emitAdmissionDenied emits correct metric with reason", async () => { + it("emitClientRateLimited emits correct metric", async () => { // @ts-expect-error -- modulePaths resolves at runtime const mod = await import("services/delivery-metrics"); - const { emitAdmissionDenied } = mod; + const { emitClientRateLimited } = mod; - emitAdmissionDenied("target-42", "rate_limited"); + emitClientRateLimited("target-42", 3); expect(mockMetrics.setProperty).toHaveBeenCalledWith( "targetId", "target-42", ); + expect(mockMetrics.putMetric).toHaveBeenCalledWith( + "DeliveryRateLimited", + 3, + "Count", + 1, + ); + }); + + it("emitCircuitBlocked emits correct metric", async () => { + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery-metrics"); + const { emitCircuitBlocked } = mod; + + emitCircuitBlocked("target-42", 2); + expect(mockMetrics.setProperty).toHaveBeenCalledWith( - "reason", - "rate_limited", + "targetId", + "target-42", ); expect(mockMetrics.putMetric).toHaveBeenCalledWith( - "AdmissionDenied", - 1, + "DeliveryCircuitBlocked", + 2, "Count", 1, ); diff --git a/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts b/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts index 2c0c0543..b1592a7b 100644 --- a/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts @@ -20,15 +20,16 @@ jest.mock("@nhs-notify-client-callbacks/logger", () => ({ })); jest.mock("services/delivery-metrics", () => ({ - emitAdmissionDenied: jest.fn(), + emitCircuitBlocked: jest.fn(), emitCircuitBreakerClosed: jest.fn(), emitCircuitBreakerOpen: jest.fn(), + emitClientRateLimited: jest.fn(), emitDeliveryAttempt: jest.fn(), emitDeliveryDuration: jest.fn(), emitDeliveryFailure: jest.fn(), emitDeliveryPermanentFailure: jest.fn(), emitDeliverySuccess: jest.fn(), - emitRateLimited: jest.fn(), + emitServerRateLimited: jest.fn(), emitRetryWindowExhausted: jest.fn(), })); @@ -98,15 +99,44 @@ describe("delivery-observability", () => { ); }); + it("recordDeliveryPermanentFailure includes statusCode and errorCode when provided", () => { + const { emitDeliveryPermanentFailure } = jest.requireMock( + "services/delivery-metrics", + ); + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); + + recordDeliveryPermanentFailure( + "client-1", + "target-1", + 400, + "INVALID_PAYLOAD", + "msg-456", + ); + + expect(emitDeliveryPermanentFailure).toHaveBeenCalledWith("target-1"); + expect(logger.warn).toHaveBeenCalledWith( + "Permanent delivery failure \u2014 sending to DLQ", + expect.objectContaining({ + clientId: "client-1", + targetId: "target-1", + correlationId: "msg-456", + statusCode: 400, + errorCode: "INVALID_PAYLOAD", + }), + ); + }); + it("recordDeliveryRateLimited emits metric and logs", () => { - const { emitRateLimited } = jest.requireMock("services/delivery-metrics"); + const { emitServerRateLimited } = jest.requireMock( + "services/delivery-metrics", + ); const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); recordDeliveryRateLimited("client-1", "target-1", "msg-123"); - expect(emitRateLimited).toHaveBeenCalledWith("target-1"); + expect(emitServerRateLimited).toHaveBeenCalledWith("target-1"); expect(logger.info).toHaveBeenCalledWith( - "Rate limited (429)", + "Server rate limited (429)", expect.objectContaining({ clientId: "client-1", targetId: "target-1", @@ -192,8 +222,8 @@ describe("delivery-observability", () => { ); }); - it("recordAdmissionDenied emits per-record metrics and logs", () => { - const { emitAdmissionDenied } = jest.requireMock( + it("recordAdmissionDenied emits rate limited metric for rate_limited reason", () => { + const { emitClientRateLimited } = jest.requireMock( "services/delivery-metrics", ); const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); @@ -203,11 +233,7 @@ describe("delivery-observability", () => { "msg-b", ]); - expect(emitAdmissionDenied).toHaveBeenCalledWith( - "target-1", - "rate_limited", - 2, - ); + expect(emitClientRateLimited).toHaveBeenCalledWith("target-1", 2); expect(logger.warn).toHaveBeenCalledWith( "Admission denied", expect.objectContaining({ @@ -220,6 +246,27 @@ describe("delivery-observability", () => { ); }); + it("recordAdmissionDenied emits circuit blocked metric for circuit_open reason", () => { + const { emitCircuitBlocked } = jest.requireMock( + "services/delivery-metrics", + ); + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); + + recordAdmissionDenied("client-1", "target-1", "circuit_open", ["msg-a"]); + + expect(emitCircuitBlocked).toHaveBeenCalledWith("target-1", 1); + expect(logger.warn).toHaveBeenCalledWith( + "Admission denied", + expect.objectContaining({ + clientId: "client-1", + targetId: "target-1", + reason: "circuit_open", + deniedCount: 1, + correlationIds: ["msg-a"], + }), + ); + }); + it("recordDeliveryDuration emits metric", () => { const { emitDeliveryDuration } = jest.requireMock( "services/delivery-metrics", diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 7121cbeb..5e4dade3 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -3,6 +3,7 @@ import { DEFAULT_TARGET, makeRecord, } from "__tests__/fixtures/handler-fixtures"; +import { VisibilityManagedError } from "services/visibility-managed-error"; jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { @@ -108,7 +109,7 @@ describe("processRecords", () => { mockJitteredBackoff.mockReturnValue(5); mockIsWindowExhausted.mockReturnValue(false); mockHandleRateLimitedRecord.mockRejectedValue( - new Error("Rate limited — requeue"), + new VisibilityManagedError("Rate limited — requeue"), ); mockGetRedisClient.mockResolvedValue({}); mockAdmit.mockResolvedValue({ @@ -248,16 +249,29 @@ describe("processRecords", () => { const failures = await processRecords([record1, record2]); - expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 5); + expect(failures).toEqual([]); + expect(mockSendToDlq).toHaveBeenCalledWith(record1.body); }); - it("applies jittered backoff cooldown on unexpected errors", async () => { + it("sends unhandled errors to DLQ", async () => { mockDeliverPayload.mockRejectedValue(new Error("Infrastructure error")); const failures = await processRecords([makeRecord()]); + expect(failures).toEqual([]); + expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body); + expect(mockChangeVisibility).not.toHaveBeenCalled(); + }); + + it("retries VisibilityManagedError without DLQ", async () => { + mockDeliverPayload.mockRejectedValue( + new VisibilityManagedError("Rate limited — requeue"), + ); + + const failures = await processRecords([makeRecord()]); + expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); + expect(mockSendToDlq).not.toHaveBeenCalled(); expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 5); }); @@ -399,11 +413,49 @@ describe("processRecords", () => { 10, false, 1, - expect.any(Object), + expect.objectContaining({ burstCapacity: 50 }), ); expect(mockDeliverPayload).toHaveBeenCalled(); }); + it("computes burst capacity as invocationRateLimit * 5", async () => { + const targetHighRate = { + ...DEFAULT_TARGET, + invocationRateLimit: 100, + }; + mockLoadTargetConfig.mockResolvedValue(targetHighRate); + + await processRecords([makeRecord()]); + + expect(mockAdmit).toHaveBeenCalledWith( + expect.anything(), + "target-1", + 100, + false, + 1, + expect.objectContaining({ burstCapacity: 500 }), + ); + }); + + it("caps burst capacity at TOKEN_BUCKET_BURST_CAPACITY", async () => { + const targetVeryHighRate = { + ...DEFAULT_TARGET, + invocationRateLimit: 1000, + }; + mockLoadTargetConfig.mockResolvedValue(targetVeryHighRate); + + await processRecords([makeRecord()]); + + expect(mockAdmit).toHaveBeenCalledWith( + expect.anything(), + "target-1", + 1000, + false, + 1, + expect.objectContaining({ burstCapacity: 2250 }), + ); + }); + it("calls recordResult with batch counts on successful delivery when CB enabled", async () => { const targetCb = { ...DEFAULT_TARGET, diff --git a/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts b/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts index fae8112f..95de2d6d 100644 --- a/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts @@ -9,17 +9,6 @@ jest.mock("@aws-sdk/client-s3", () => { }; }); -const mockSecretsManagerSend = jest.fn(); -jest.mock("@aws-sdk/client-secrets-manager", () => { - const actual = jest.requireActual("@aws-sdk/client-secrets-manager"); - return { - ...actual, - SecretsManagerClient: jest - .fn() - .mockImplementation(() => ({ send: mockSecretsManagerSend })), - }; -}); - jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { info: jest.fn(), @@ -96,10 +85,9 @@ describe("tls-agent-factory", () => { beforeEach(async () => { jest.resetModules(); - delete process.env.MTLS_CERT_SECRET_ARN; - process.env.MTLS_TEST_CERT_S3_BUCKET = "test-certs-bucket"; - process.env.MTLS_TEST_CERT_S3_KEY = "client.pem"; - delete process.env.MTLS_TEST_CA_S3_KEY; + process.env.MTLS_CERT_S3_BUCKET = "test-certs-bucket"; + process.env.MTLS_CERT_S3_KEY = "client.pem"; + delete process.env.MTLS_CA_S3_KEY; process.env.CERT_EXPIRY_THRESHOLD_MS = "86400000"; // @ts-expect-error -- modulePaths resolves at runtime @@ -108,7 +96,6 @@ describe("tls-agent-factory", () => { resetCache = mod.resetCache; mockS3Send.mockReset(); - mockSecretsManagerSend.mockReset(); }); it("builds agent with key and cert when mtls is enabled", async () => { @@ -126,11 +113,10 @@ describe("tls-agent-factory", () => { expect(agent).toBeDefined(); expect(mockS3Send).not.toHaveBeenCalled(); - expect(mockSecretsManagerSend).not.toHaveBeenCalled(); }); - it("loads test CA for server trust when MTLS_TEST_CA_S3_KEY is set and mtls is disabled", async () => { - process.env.MTLS_TEST_CA_S3_KEY = "test-ca.pem"; + it("loads CA for server trust when MTLS_CA_S3_KEY is set and mtls is disabled", async () => { + process.env.MTLS_CA_S3_KEY = "test-ca.pem"; jest.resetModules(); // @ts-expect-error -- modulePaths resolves at runtime const mod = await import("services/delivery/tls-agent-factory"); @@ -157,8 +143,8 @@ describe("tls-agent-factory", () => { expect(agent.options.cert).toBeUndefined(); }); - it("loads test CA when MTLS_TEST_CA_S3_KEY is set", async () => { - process.env.MTLS_TEST_CA_S3_KEY = "test-ca.pem"; + it("loads CA when MTLS_CA_S3_KEY is set", async () => { + process.env.MTLS_CA_S3_KEY = "test-ca.pem"; jest.resetModules(); // @ts-expect-error -- modulePaths resolves at runtime const mod = await import("services/delivery/tls-agent-factory"); @@ -183,32 +169,11 @@ describe("tls-agent-factory", () => { expect(mockS3Send).toHaveBeenCalledTimes(2); }); - it("loads cert from S3 in non-production", async () => { + it("loads cert from S3", async () => { mockS3PemResponse(COMBINED_PEM); await buildAgent(createTarget({ delivery: { mtls: { enabled: true } } })); expect(mockS3Send).toHaveBeenCalledTimes(1); - expect(mockSecretsManagerSend).not.toHaveBeenCalled(); - }); - - it("loads cert from SecretsManager in production", async () => { - process.env.MTLS_CERT_SECRET_ARN = - "arn:aws:secretsmanager:eu-west-2:123:secret:mtls-cert"; - jest.resetModules(); - // @ts-expect-error -- modulePaths resolves at runtime - const mod = await import("services/delivery/tls-agent-factory"); - - mockSecretsManagerSend.mockResolvedValue({ - SecretString: JSON.stringify({ key: TEST_KEY, cert: TEST_CERT }), - }); - - const agent = await mod.buildAgent( - createTarget({ delivery: { mtls: { enabled: true } } }), - ); - - expect(agent).toBeDefined(); - expect(mockSecretsManagerSend).toHaveBeenCalledTimes(1); - expect(mockS3Send).not.toHaveBeenCalled(); }); it("caches cert material on subsequent calls", async () => { @@ -240,32 +205,16 @@ describe("tls-agent-factory", () => { expect(mockS3Send).toHaveBeenCalledTimes(2); }); - it("throws when SecretsManager returns empty SecretString", async () => { - process.env.MTLS_CERT_SECRET_ARN = - "arn:aws:secretsmanager:eu-west-2:123:secret:mtls-cert"; - jest.resetModules(); - // @ts-expect-error -- modulePaths resolves at runtime - const mod = await import("services/delivery/tls-agent-factory"); - - mockSecretsManagerSend.mockResolvedValue({ SecretString: undefined }); - - await expect( - mod.buildAgent(createTarget({ delivery: { mtls: { enabled: true } } })), - ).rejects.toThrow("mTLS cert secret has no value"); - }); - - it("throws when S3 env vars are missing in non-production", async () => { - delete process.env.MTLS_TEST_CERT_S3_BUCKET; - delete process.env.MTLS_TEST_CERT_S3_KEY; + it("throws when S3 env vars are missing", async () => { + delete process.env.MTLS_CERT_S3_BUCKET; + delete process.env.MTLS_CERT_S3_KEY; jest.resetModules(); // @ts-expect-error -- modulePaths resolves at runtime const mod = await import("services/delivery/tls-agent-factory"); await expect( mod.buildAgent(createTarget({ delivery: { mtls: { enabled: true } } })), - ).rejects.toThrow( - "MTLS_TEST_CERT_S3_BUCKET and MTLS_TEST_CERT_S3_KEY are required", - ); + ).rejects.toThrow("MTLS_CERT_S3_BUCKET and MTLS_CERT_S3_KEY are required"); }); it("throws when S3 object body is empty", async () => { @@ -392,7 +341,6 @@ describe("tls-agent-factory", () => { expect(agent).toBeDefined(); expect(mockS3Send).not.toHaveBeenCalled(); - expect(mockSecretsManagerSend).not.toHaveBeenCalled(); }); it("throws when certPinning.enabled is true but spkiHash is missing", async () => { diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index 19420462..395815ad 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -19,6 +19,7 @@ import { isWindowExhausted, jitteredBackoffSeconds, } from "services/delivery/retry-policy"; +import { VisibilityManagedError } from "services/visibility-managed-error"; import { type EndpointGateConfig, admit, @@ -43,10 +44,14 @@ type RedisClientType = Awaited>; const DEFAULT_MAX_RETRY_DURATION_MS = 7_200_000; // 2 hours const DEFAULT_CONCURRENCY_LIMIT = 5; +const BURST_MULTIPLIER = 5; +const MAX_BURST_CAPACITY = Number( + process.env.TOKEN_BUCKET_BURST_CAPACITY ?? "2250", +); const gateConfig: EndpointGateConfig = { - // Max tokens the bucket can hold — absorbs short traffic bursts without throttling (default: 2250) - burstCapacity: Number(process.env.TOKEN_BUCKET_BURST_CAPACITY ?? "2250"), + // Max tokens the bucket can hold — absorbs short traffic bursts without throttling + burstCapacity: MAX_BURST_CAPACITY, // Probe rate to test endpoint recovery when half-open (default: 1/60 req/s) probeRateLimit: Number(process.env.CB_PROBE_RATE_LIMIT ?? String(1 / 60)), // Linear ramp-up after circuit closes, avoids flooding a freshly recovered endpoint (default: 10 min) @@ -226,13 +231,18 @@ async function processTargetBatch( const target = await loadTargetConfig(clientId, batch.targetId); const cbEnabled = target.delivery?.circuitBreaker?.enabled ?? false; + const targetBurstCapacity = Math.min( + target.invocationRateLimit * BURST_MULTIPLIER, + MAX_BURST_CAPACITY, + ); + const gateResult = await admit( redis, batch.targetId, target.invocationRateLimit, cbEnabled, batch.records.length, - gateConfig, + { ...gateConfig, burstCapacity: targetBurstCapacity }, ); if (!gateResult.allowed) { @@ -276,12 +286,20 @@ async function processTargetBatch( correlationId, err: error, }); - const receiveCount = Number(record.attributes.ApproximateReceiveCount); - await changeVisibility( - record.receiptHandle, - jitteredBackoffSeconds(receiveCount), - ); - return { record, success: false, dlq: false }; + + if (error instanceof VisibilityManagedError) { + const receiveCount = Number( + record.attributes.ApproximateReceiveCount, + ); + await changeVisibility( + record.receiptHandle, + jitteredBackoffSeconds(receiveCount), + ); + return { record, success: false, dlq: false }; + } + + await sendToDlq(record.body); + return { record, success: true, dlq: true }; } }, { concurrency: concurrencyLimit }, diff --git a/lambdas/https-client-lambda/src/services/delivery-metrics.ts b/lambdas/https-client-lambda/src/services/delivery-metrics.ts index dcefaf57..ff32c751 100644 --- a/lambdas/https-client-lambda/src/services/delivery-metrics.ts +++ b/lambdas/https-client-lambda/src/services/delivery-metrics.ts @@ -14,6 +14,7 @@ function getMetrics(): MetricsLogger { const namespace = process.env.METRICS_NAMESPACE; const environment = process.env.ENVIRONMENT; + const clientId = process.env.CLIENT_ID; if (!namespace) { throw new Error("METRICS_NAMESPACE environment variable is not set"); @@ -21,10 +22,16 @@ function getMetrics(): MetricsLogger { if (!environment) { throw new Error("ENVIRONMENT environment variable is not set"); } + if (!clientId) { + throw new Error("CLIENT_ID environment variable is not set"); + } metricsInstance = createMetricsLogger(); metricsInstance.setNamespace(namespace); - metricsInstance.setDimensions({ Environment: environment }); + metricsInstance.setDimensions({ + Environment: environment, + ClientId: clientId, + }); return metricsInstance; } @@ -58,11 +65,11 @@ export function emitDeliveryPermanentFailure(targetId: string): void { ); } -export function emitRateLimited(targetId: string): void { +export function emitServerRateLimited(targetId: string): void { const metrics = getMetrics(); metrics.setProperty("targetId", targetId); metrics.putMetric( - "DeliveryRateLimited", + "DeliveryServerRateLimited", 1, Unit.Count, StorageResolution.High, @@ -102,16 +109,22 @@ export function emitRetryWindowExhausted(targetId: string): void { ); } -export function emitAdmissionDenied( - targetId: string, - reason: string, - count = 1, -): void { +export function emitClientRateLimited(targetId: string, count = 1): void { + const metrics = getMetrics(); + metrics.setProperty("targetId", targetId); + metrics.putMetric( + "DeliveryRateLimited", + count, + Unit.Count, + StorageResolution.High, + ); +} + +export function emitCircuitBlocked(targetId: string, count = 1): void { const metrics = getMetrics(); metrics.setProperty("targetId", targetId); - metrics.setProperty("reason", reason); metrics.putMetric( - "AdmissionDenied", + "DeliveryCircuitBlocked", count, Unit.Count, StorageResolution.High, diff --git a/lambdas/https-client-lambda/src/services/delivery-observability.ts b/lambdas/https-client-lambda/src/services/delivery-observability.ts index d722b851..f61807b0 100644 --- a/lambdas/https-client-lambda/src/services/delivery-observability.ts +++ b/lambdas/https-client-lambda/src/services/delivery-observability.ts @@ -1,15 +1,16 @@ import { logger } from "@nhs-notify-client-callbacks/logger"; import { - emitAdmissionDenied, + emitCircuitBlocked, emitCircuitBreakerClosed, emitCircuitBreakerOpen, + emitClientRateLimited, emitDeliveryAttempt, emitDeliveryDuration, emitDeliveryFailure, emitDeliveryPermanentFailure, emitDeliverySuccess, - emitRateLimited, emitRetryWindowExhausted, + emitServerRateLimited, } from "services/delivery-metrics"; export function recordDeliveryAttempt( @@ -60,8 +61,12 @@ export function recordDeliveryRateLimited( targetId: string, correlationId?: string, ): void { - emitRateLimited(targetId); - logger.info("Rate limited (429)", { clientId, targetId, correlationId }); + emitServerRateLimited(targetId); + logger.info("Server rate limited (429)", { + clientId, + targetId, + correlationId, + }); } export function recordDeliveryFailure( @@ -118,7 +123,11 @@ export function recordAdmissionDenied( reason: string, correlationIds: (string | undefined)[], ): void { - emitAdmissionDenied(targetId, reason, correlationIds.length); + if (reason === "circuit_open") { + emitCircuitBlocked(targetId, correlationIds.length); + } else { + emitClientRateLimited(targetId, correlationIds.length); + } logger.warn("Admission denied", { clientId, targetId, diff --git a/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts b/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts index fb1ea136..bb1e7334 100644 --- a/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts +++ b/lambdas/https-client-lambda/src/services/delivery/tls-agent-factory.ts @@ -4,24 +4,14 @@ import { checkServerIdentity } from "node:tls"; import type { PeerCertificate } from "node:tls"; import forge from "node-forge"; import { GetObjectCommand, S3Client } from "@aws-sdk/client-s3"; -import { - GetSecretValueCommand, - SecretsManagerClient, -} from "@aws-sdk/client-secrets-manager"; import type { CallbackTarget } from "@nhs-notify-client-callbacks/models"; import { logger } from "@nhs-notify-client-callbacks/logger"; -const { - MTLS_CERT_SECRET_ARN, - MTLS_TEST_CA_S3_KEY, - MTLS_TEST_CERT_S3_BUCKET, - MTLS_TEST_CERT_S3_KEY, -} = process.env; +const { MTLS_CA_S3_KEY, MTLS_CERT_S3_BUCKET, MTLS_CERT_S3_KEY } = process.env; const CERT_EXPIRY_THRESHOLD_MS = Number(process.env.CERT_EXPIRY_THRESHOLD_MS) || 86_400_000; // 24 hours const s3Client = new S3Client({}); -const secretsClient = new SecretsManagerClient({}); export const PERMANENT_TLS_ERROR_CODES = new Set([ "CERT_HAS_EXPIRED", @@ -41,25 +31,6 @@ type CertMaterial = { let cachedMaterial: CertMaterial | undefined; -async function loadFromSecretsManager(): Promise<{ - key: string; - cert: string; -}> { - const response = await secretsClient.send( - new GetSecretValueCommand({ SecretId: MTLS_CERT_SECRET_ARN }), - ); - - if (!response.SecretString) { - throw new Error("mTLS cert secret has no value"); - } - - const parsed = JSON.parse(response.SecretString) as { - key: string; - cert: string; - }; - return { key: parsed.key, cert: parsed.cert }; -} - async function loadS3Object(bucket: string, key: string): Promise { const response = await s3Client.send( new GetObjectCommand({ Bucket: bucket, Key: key }), @@ -77,16 +48,11 @@ async function loadFromS3(): Promise<{ cert: string; ca?: string; }> { - if (!MTLS_TEST_CERT_S3_BUCKET || !MTLS_TEST_CERT_S3_KEY) { - throw new Error( - "MTLS_TEST_CERT_S3_BUCKET and MTLS_TEST_CERT_S3_KEY are required in non-production", - ); + if (!MTLS_CERT_S3_BUCKET || !MTLS_CERT_S3_KEY) { + throw new Error("MTLS_CERT_S3_BUCKET and MTLS_CERT_S3_KEY are required"); } - const pem = await loadS3Object( - MTLS_TEST_CERT_S3_BUCKET, - MTLS_TEST_CERT_S3_KEY, - ); + const pem = await loadS3Object(MTLS_CERT_S3_BUCKET, MTLS_CERT_S3_KEY); const pemObjects = forge.pem.decode(pem); const keyObj = pemObjects.find((obj) => obj.type.includes("PRIVATE KEY")); @@ -95,31 +61,28 @@ async function loadFromS3(): Promise<{ const cert = certObj ? forge.pem.encode(certObj) : ""; let ca: string | undefined; - if (MTLS_TEST_CA_S3_KEY) { - ca = await loadS3Object(MTLS_TEST_CERT_S3_BUCKET, MTLS_TEST_CA_S3_KEY); + if (MTLS_CA_S3_KEY) { + ca = await loadS3Object(MTLS_CERT_S3_BUCKET, MTLS_CA_S3_KEY); } return { key, cert, ca }; } async function loadCertMaterial(): Promise { - const isProduction = Boolean(MTLS_CERT_SECRET_ARN); - const raw = isProduction - ? await loadFromSecretsManager() - : await loadFromS3(); + const raw = await loadFromS3(); const x509 = new X509Certificate(raw.cert); const validTo = new Date(x509.validTo); logger.info("mTLS certificate loaded", { - source: isProduction ? "SecretsManager" : "S3", + source: "S3", validTo: validTo.toISOString(), }); return { key: raw.key, cert: raw.cert, - ca: "ca" in raw ? (raw.ca as string | undefined) : undefined, + ca: raw.ca, validTo, }; } @@ -150,10 +113,9 @@ export async function buildAgent(target: CallbackTarget): Promise { ); } - // Always load the CA in test environments (MTLS_TEST_CA_S3_KEY set) so that - // targets with mtls.enabled: false can still verify the server's cert chain. - // In production the CA comes from SecretsManager only when mTLS is in use. - if (target.delivery?.mtls?.enabled || MTLS_TEST_CA_S3_KEY) { + // Load CA from S3 when configured so targets with mtls.enabled: false can + // still verify the server's cert chain. + if (target.delivery?.mtls?.enabled || MTLS_CA_S3_KEY) { const material = await getMaterial(); if (material.ca) { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index f2b2aa3a..12926732 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -52,9 +52,6 @@ catalogs: '@aws-sdk/client-s3': specifier: ^3.1024.0 version: 3.1029.0 - '@aws-sdk/client-secrets-manager': - specifier: ^3.1023.0 - version: 3.1029.0 '@aws-sdk/client-sqs': specifier: ^3.1023.0 version: 3.1026.0 @@ -340,9 +337,6 @@ importers: '@aws-sdk/client-s3': specifier: catalog:aws version: 3.1029.0 - '@aws-sdk/client-secrets-manager': - specifier: catalog:aws - version: 3.1029.0 '@aws-sdk/client-sqs': specifier: catalog:aws version: 3.1026.0 @@ -785,10 +779,6 @@ packages: resolution: {integrity: sha512-OuA8RZTxsAaHDcI25j2NGLMaYFI2WpJdDzK3uLmVBmaHwjQKQZOUDVVBcln8pNo3IgkY+HRSJhRR4/xlM//UyQ==} engines: {node: '>=20.0.0'} - '@aws-sdk/client-secrets-manager@3.1029.0': - resolution: {integrity: sha512-OtNiJSEXA8+KkFA1aS24BOFkJoRlxwJ8tBLiUUYKVwLu8L3Smfz2oj4BJwRlv0FzWTqrmJkFC8kly/cAZqU2UQ==} - engines: {node: '>=20.0.0'} - '@aws-sdk/client-sqs@3.1026.0': resolution: {integrity: sha512-b7z2WI1tqObk4U7vUbmBfXIeFhxKbFr7xQ4rWi879iFl5aSPvpd1WAmLi6z1boVKTEwEqHALuE5MyGBHhOCy5A==} engines: {node: '>=20.0.0'} @@ -4916,50 +4906,6 @@ snapshots: transitivePeerDependencies: - aws-crt - '@aws-sdk/client-secrets-manager@3.1029.0': - dependencies: - '@aws-crypto/sha256-browser': 5.2.0 - '@aws-crypto/sha256-js': 5.2.0 - '@aws-sdk/core': 3.973.27 - '@aws-sdk/credential-provider-node': 3.972.30 - '@aws-sdk/middleware-host-header': 3.972.9 - '@aws-sdk/middleware-logger': 3.972.9 - '@aws-sdk/middleware-recursion-detection': 3.972.10 - '@aws-sdk/middleware-user-agent': 3.972.29 - '@aws-sdk/region-config-resolver': 3.972.11 - '@aws-sdk/types': 3.973.7 - '@aws-sdk/util-endpoints': 3.996.6 - '@aws-sdk/util-user-agent-browser': 3.972.9 - '@aws-sdk/util-user-agent-node': 3.973.15 - '@smithy/config-resolver': 4.4.14 - '@smithy/core': 3.23.14 - '@smithy/fetch-http-handler': 5.3.16 - '@smithy/hash-node': 4.2.13 - '@smithy/invalid-dependency': 4.2.13 - '@smithy/middleware-content-length': 4.2.13 - '@smithy/middleware-endpoint': 4.4.29 - '@smithy/middleware-retry': 4.5.0 - '@smithy/middleware-serde': 4.2.17 - '@smithy/middleware-stack': 4.2.13 - '@smithy/node-config-provider': 4.3.13 - '@smithy/node-http-handler': 4.5.2 - '@smithy/protocol-http': 5.3.13 - '@smithy/smithy-client': 4.12.9 - '@smithy/types': 4.14.0 - '@smithy/url-parser': 4.2.13 - '@smithy/util-base64': 4.3.2 - '@smithy/util-body-length-browser': 4.2.2 - '@smithy/util-body-length-node': 4.2.3 - '@smithy/util-defaults-mode-browser': 4.3.45 - '@smithy/util-defaults-mode-node': 4.2.49 - '@smithy/util-endpoints': 3.3.4 - '@smithy/util-middleware': 4.2.13 - '@smithy/util-retry': 4.3.0 - '@smithy/util-utf8': 4.2.2 - tslib: 2.8.1 - transitivePeerDependencies: - - aws-crt - '@aws-sdk/client-sqs@3.1026.0': dependencies: '@aws-crypto/sha256-browser': 5.2.0 diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index c343e4f9..3e9e890d 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -30,7 +30,6 @@ catalogs: "@aws-sdk/client-cloudwatch": "^3.1025.0" "@aws-sdk/client-cloudwatch-logs": "^3.1023.0" "@aws-sdk/client-s3": "^3.1024.0" - "@aws-sdk/client-secrets-manager": "^3.1023.0" "@aws-sdk/client-sqs": "^3.1023.0" "@aws-sdk/client-ssm": "^3.1025.0" "@aws-crypto/sha256-js": "^5.2.0" diff --git a/src/models/src/__tests__/client-config-schema.test.ts b/src/models/src/__tests__/client-config-schema.test.ts index fa90a061..f1ff0702 100644 --- a/src/models/src/__tests__/client-config-schema.test.ts +++ b/src/models/src/__tests__/client-config-schema.test.ts @@ -259,13 +259,6 @@ describe("parseClientSubscriptionConfiguration", () => { expect(result.success).toBe(false); }); - it("accepts maxRetryDurationSeconds below 60", () => { - const config = createValidConfig(); - config.targets[0].delivery = { maxRetryDurationSeconds: 10 }; - - expect(parseClientSubscriptionConfiguration(config).success).toBe(true); - }); - it("accepts maxRetryDurationSeconds at boundary value 1", () => { const config = createValidConfig(); config.targets[0].delivery = { maxRetryDurationSeconds: 1 }; From 8de42744d5173cb2f0de376af8c5534b4729afdd Mon Sep 17 00:00:00 2001 From: Tim Marston Date: Thu, 30 Apr 2026 11:32:25 +0100 Subject: [PATCH 32/65] consistency and naming changes --- .../src/__tests__/endpoint-gate.test.ts | 18 +++---- .../src/__tests__/handler.test.ts | 8 +-- .../src/__tests__/record-result-lua.test.ts | 40 +++++++-------- lambdas/https-client-lambda/src/handler.ts | 4 +- .../src/services/admit.lua | 20 +++++++- .../src/services/endpoint-gate.ts | 8 +-- .../src/services/record-result.lua | 49 +++++++++---------- 7 files changed, 81 insertions(+), 66 deletions(-) diff --git a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts index 57573a3d..dc539a0d 100644 --- a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts @@ -208,13 +208,13 @@ describe("recordResult", () => { defaultConfig, ); - expect(result).toEqual({ circuitState: "closed", stateChanged: false }); + expect(result).toEqual({ circuitState: "closed", circuitSwitched: false }); expect(mockSendCommand).toHaveBeenCalledWith( expect.arrayContaining(["EVALSHA"]), ); }); - it("returns open with stateChanged when failure crosses threshold", async () => { + it("returns open with circuitSwitched when failure crosses threshold", async () => { mockSendCommand.mockResolvedValueOnce(["open", 1]); const result = await recordResult( @@ -225,10 +225,10 @@ describe("recordResult", () => { defaultConfig, ); - expect(result).toEqual({ circuitState: "open", stateChanged: true }); + expect(result).toEqual({ circuitState: "open", circuitSwitched: true }); }); - it("returns closed_recovery with stateChanged when circuit closes", async () => { + it("returns closed_recovery with circuitSwitched when circuit closes", async () => { mockSendCommand.mockResolvedValueOnce(["closed_recovery", 1]); const result = await recordResult( @@ -241,12 +241,12 @@ describe("recordResult", () => { expect(result).toEqual({ circuitState: "closed_recovery", - stateChanged: true, + circuitSwitched: true, }); }); - it("returns half_open without stateChanged when probing", async () => { - mockSendCommand.mockResolvedValueOnce(["half_open", 0]); + it("returns open_half without circuitSwitched when probing", async () => { + mockSendCommand.mockResolvedValueOnce(["open_half", 0]); const result = await recordResult( mockRedis, @@ -256,7 +256,7 @@ describe("recordResult", () => { defaultConfig, ); - expect(result).toEqual({ circuitState: "half_open", stateChanged: false }); + expect(result).toEqual({ circuitState: "open_half", circuitSwitched: false }); }); it("falls back to EVAL on NOSCRIPT error", async () => { @@ -272,7 +272,7 @@ describe("recordResult", () => { defaultConfig, ); - expect(result).toEqual({ circuitState: "closed", stateChanged: false }); + expect(result).toEqual({ circuitState: "closed", circuitSwitched: false }); expect(mockSendCommand).toHaveBeenCalledTimes(2); }); diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 5e4dade3..0f4581aa 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -563,7 +563,7 @@ describe("processRecords", () => { }); mockRecordResult.mockResolvedValue({ circuitState: "open", - stateChanged: true, + circuitSwitched: true, }); const { recordCircuitBreakerOpen } = jest.requireMock( @@ -587,7 +587,7 @@ describe("processRecords", () => { }); mockRecordResult.mockResolvedValue({ circuitState: "open", - stateChanged: false, + circuitSwitched: false, }); const { recordCircuitBreakerOpen } = jest.requireMock( @@ -611,7 +611,7 @@ describe("processRecords", () => { }); mockRecordResult.mockResolvedValue({ circuitState: "closed", - stateChanged: false, + circuitSwitched: false, }); const { recordCircuitBreakerOpen } = jest.requireMock( @@ -635,7 +635,7 @@ describe("processRecords", () => { }); mockRecordResult.mockResolvedValue({ circuitState: "closed_recovery", - stateChanged: true, + circuitSwitched: true, }); const { recordCircuitBreakerClosed } = jest.requireMock( diff --git a/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts b/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts index 00e04707..cde2a653 100644 --- a/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts @@ -3,9 +3,9 @@ import { createRedisStore, evalLua } from "__tests__/helpers/lua-redis-mock"; // ARGV: [now, consumedTokens, processingFailures, cooldownPeriodMs, recoveryPeriodMs, failureThreshold, minAttempts, samplePeriodMs] // KEYS: [epKey] -// Returns: [circuitState, stateChanged] -// circuitState: "open" | "half_open" | "closed_recovery" | "closed" -// stateChanged: 0 | 1 +// Returns: [circuitState, circuitSwitched] +// circuitState: "open" | "open_half" | "closed_recovery" | "closed" +// circuitSwitched: 0 | 1 type RecordResultArgs = { now: number; @@ -67,13 +67,13 @@ describe("record-result.lua", () => { ]), ); - const [circuitState, stateChanged] = runRecordResult(store, { + const [circuitState, circuitSwitched] = runRecordResult(store, { consumedTokens: 5, processingFailures: 0, }); expect(circuitState).toBe("closed"); - expect(stateChanged).toBe(0); + expect(circuitSwitched).toBe(0); }); it("increments cur_attempts without incrementing cur_failures", () => { @@ -125,13 +125,13 @@ describe("record-result.lua", () => { ]), ); - const [circuitState, stateChanged] = runRecordResult(store, { + const [circuitState, circuitSwitched] = runRecordResult(store, { consumedTokens: 1, processingFailures: 1, }); expect(circuitState).toBe("closed"); - expect(stateChanged).toBe(0); + expect(circuitSwitched).toBe(0); }); }); @@ -178,7 +178,7 @@ describe("record-result.lua", () => { ]), ); - const [circuitState, stateChanged] = runRecordResult(store, { + const [circuitState, circuitSwitched] = runRecordResult(store, { now, cooldownPeriodMs: 120_000, consumedTokens: 1, @@ -186,7 +186,7 @@ describe("record-result.lua", () => { }); expect(circuitState).toBe("open"); - expect(stateChanged).toBe(0); + expect(circuitSwitched).toBe(0); }); }); @@ -202,14 +202,14 @@ describe("record-result.lua", () => { ]), ); - const [circuitState, stateChanged] = runRecordResult(store, { + const [circuitState, circuitSwitched] = runRecordResult(store, { consumedTokens: 5, processingFailures: 5, minAttempts: 5, failureThreshold: 0.3, }); expect(circuitState).toBe("open"); - expect(stateChanged).toBe(1); + expect(circuitSwitched).toBe(1); }); it("does not open circuit when below minimum attempts", () => { @@ -223,14 +223,14 @@ describe("record-result.lua", () => { ]), ); - const [circuitState, stateChanged] = runRecordResult(store, { + const [circuitState, circuitSwitched] = runRecordResult(store, { consumedTokens: 3, processingFailures: 3, minAttempts: 5, failureThreshold: 0.3, }); expect(circuitState).toBe("closed"); - expect(stateChanged).toBe(0); + expect(circuitSwitched).toBe(0); }); it("sets is_open and switched_at on open", () => { @@ -304,7 +304,7 @@ describe("record-result.lua", () => { ]), ); - const [circuitState, stateChanged] = runRecordResult(store, { + const [circuitState, circuitSwitched] = runRecordResult(store, { now, cooldownPeriodMs: 120_000, consumedTokens: 1, @@ -312,7 +312,7 @@ describe("record-result.lua", () => { }); expect(circuitState).toBe("closed_recovery"); - expect(stateChanged).toBe(1); + expect(circuitSwitched).toBe(1); const epHash = store.get("ep:t1")!; expect(epHash.get("is_open")).toBe("0"); @@ -333,15 +333,15 @@ describe("record-result.lua", () => { ]), ); - const [circuitState, stateChanged] = runRecordResult(store, { + const [circuitState, circuitSwitched] = runRecordResult(store, { now, cooldownPeriodMs: 120_000, consumedTokens: 1, processingFailures: 1, }); - expect(circuitState).toBe("half_open"); - expect(stateChanged).toBe(0); + expect(circuitState).toBe("open_half"); + expect(circuitSwitched).toBe(0); }); }); @@ -423,7 +423,7 @@ describe("record-result.lua", () => { // interpolated attempts = 10 * 1.0 + 5 = 15 (>= minAttempts 5) // interpolated failures = 10 * 1.0 + 5 = 15 // failure rate = 15/15 = 1.0 > 0.3 → opens - const [circuitState, stateChanged] = runRecordResult(store, { + const [circuitState, circuitSwitched] = runRecordResult(store, { now, samplePeriodMs, consumedTokens: 5, @@ -432,7 +432,7 @@ describe("record-result.lua", () => { failureThreshold: 0.3, }); expect(circuitState).toBe("open"); - expect(stateChanged).toBe(1); + expect(circuitSwitched).toBe(1); }); }); diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index 395815ad..e9e1415d 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -325,11 +325,11 @@ async function processTargetBatch( processingFailures, gateConfig, ); - if (cbOutcome.stateChanged && cbOutcome.circuitState === "open") { + if (cbOutcome.circuitSwitched && cbOutcome.circuitState === "open") { recordCircuitBreakerOpen(batch.targetId); } if ( - cbOutcome.stateChanged && + cbOutcome.circuitSwitched && cbOutcome.circuitState === "closed_recovery" ) { recordCircuitBreakerClosed(batch.targetId); diff --git a/lambdas/https-client-lambda/src/services/admit.lua b/lambdas/https-client-lambda/src/services/admit.lua index ed15a928..3c47f504 100644 --- a/lambdas/https-client-lambda/src/services/admit.lua +++ b/lambdas/https-client-lambda/src/services/admit.lua @@ -11,6 +11,22 @@ -- Normal (closed): full configured rate -- -- Returns: { consumedTokens, reason, retryAfterMs, effectiveRate } +-- +-- consumedTokens: how many tokens were consumed for this batch +-- +-- reason: the reason for admission/non-admission, one of: +-- "rate_limited" — no tokens available for batch, no admission +-- "circuit_open" — circuit is open, no admission +-- "some_allowed" — 1 or more tokens consumed, some admission +-- +-- retryAfterMs: for any not admitted, how long to wait before retrying +-- +-- effectiveRate: the effective rate (tokens/s) applied to this batch + +-- Reason constants +local RATE_LIMITED = "rate_limited" +local CIRCUIT_OPEN = "circuit_open" +local SOME_ALLOWED = "some_allowed" -- Keys local epKey = KEYS[1] -- ep:{targetId} combined endpoint state hash @@ -56,7 +72,7 @@ if isOpen then if isHalfOpen then effectiveRate = probeRateLimit else - return { 0, "circuit_open", (switchedAt + cooldownMs) - now, 0 } + return { 0, CIRCUIT_OPEN, (switchedAt + cooldownMs) - now, 0 } end else if isRecovering then @@ -109,6 +125,6 @@ redis.call("HSET", epKey, "bucket_refilled_at", bucketRefilledAt ) -local reason = consumedTokens < 1 and "rate_limited" or "some_allowed" +local reason = consumedTokens < 1 and RATE_LIMITED or SOME_ALLOWED local retryAfter = consumedTokens < 1 and 1000 or 0 return { consumedTokens, reason, retryAfter, effectiveRate } diff --git a/lambdas/https-client-lambda/src/services/endpoint-gate.ts b/lambdas/https-client-lambda/src/services/endpoint-gate.ts index b96c00eb..b364c1d3 100644 --- a/lambdas/https-client-lambda/src/services/endpoint-gate.ts +++ b/lambdas/https-client-lambda/src/services/endpoint-gate.ts @@ -18,11 +18,11 @@ export type AdmitResultDenied = { export type AdmitResult = AdmitResultAllowed | AdmitResultDenied; -export type CircuitState = "open" | "half_open" | "closed_recovery" | "closed"; +export type CircuitState = "open" | "open_half" | "closed_recovery" | "closed"; export type RecordResultOutcome = { circuitState: CircuitState; - stateChanged: boolean; + circuitSwitched: boolean; }; export type EndpointGateConfig = { @@ -159,11 +159,11 @@ export async function recordResult( args, )) as [string, number]; - const [circuitState, stateChanged] = raw; + const [circuitState, circuitSwitched] = raw; return { circuitState: circuitState as CircuitState, - stateChanged: stateChanged === 1, + circuitSwitched: circuitSwitched === 1, }; } diff --git a/lambdas/https-client-lambda/src/services/record-result.lua b/lambdas/https-client-lambda/src/services/record-result.lua index fa42efea..1ebb0d36 100644 --- a/lambdas/https-client-lambda/src/services/record-result.lua +++ b/lambdas/https-client-lambda/src/services/record-result.lua @@ -7,23 +7,23 @@ -- 4. Checks whether to close the circuit (half-open + successes) -- 5. Checks whether to open the circuit (closed + threshold exceeded) -- --- Returns: { circuitState, stateChanged } +-- Returns: { circuitState, curcuitSwitched } -- -- circuitState: the current state of the circuit after this run -- "open" — fully open (during cooldown, no probes) --- "half_open" — open but past cooldown (probing) +-- "open_half" — open but past cooldown (probing) -- "closed_recovery" — closed but ramping up (recovery period) -- "closed" — closed, running at full rate -- --- stateChanged: whether a circuit transition occurred this run +-- curcuitSwitched: whether the circuit opened or closed during this run -- 1 — the circuit opened or closed during this execution -- 0 — no state transition -- Circuit state constants -local OPEN = "open" -local HALF_OPEN = "half_open" -local CLOSED_RECOVERY = "closed_recovery" -local CLOSED = "closed" +local OPEN = "open" +local OPEN_HALF = "open_half" +local CLOSED_RECOVERY = "closed_recovery" +local CLOSED = "closed" -- Keys local epKey = KEYS[1] -- ep:{targetId} combined endpoint state hash @@ -46,9 +46,8 @@ local state = redis.call("HMGET", epKey, "is_open", "switched_at", "cur_attempts", "prev_attempts", "cur_failures", "prev_failures", "sample_till") -local isOpenRaw = state[1] -local needInit = isOpenRaw == false or isOpenRaw == nil -local isOpen = needInit or tonumber(isOpenRaw) == 1 +local needInit = state[1] == false or state[1] == nil +local isOpen = needInit or tonumber(state[1]) == 1 local switchedAt = needInit and 0 or tonumber(state[2] or "0") local curAttempts = tonumber(state[3] or "0") local prevAttempts = tonumber(state[4] or "0") @@ -105,27 +104,27 @@ local failures = prevFailures * weight + curFailures -------------------------------------------------------------------------------- local processingSuccesses = consumedTokens - processingFailures -local stateChanged = false +local circuitSwitched = false -- Close circuit when half-open and there are successes if isHalfOpen and processingSuccesses > 0 then - isOpen = false - switchedAt = now - stateChanged = true + isOpen = false + switchedAt = now + circuitSwitched = true -- fall through, allow circuit to immediately re-open end -- Open circuit when closed, enough samples, and threshold exceeded local hasSampledEnough = attempts >= minAttempts if not isOpen and hasSampledEnough and (failures / attempts) > failureThreshold then - isOpen = true - switchedAt = now - curAttempts = 0 - curFailures = 0 - prevAttempts = 0 - prevFailures = 0 - sampleTill = now + samplePeriodMs - stateChanged = true + isOpen = true + switchedAt = now + curAttempts = 0 + curFailures = 0 + prevAttempts = 0 + prevFailures = 0 + sampleTill = now + samplePeriodMs + circuitSwitched = true end -------------------------------------------------------------------------------- @@ -135,7 +134,7 @@ end local circuitState if isOpen then if now > switchedAt + cooldownPeriodMs then - circuitState = HALF_OPEN + circuitState = OPEN_HALF else circuitState = OPEN end @@ -159,11 +158,11 @@ redis.call("HSET", epKey, "sample_till", sampleTill ) -if stateChanged then +if circuitSwitched then redis.call("HSET", epKey, "is_open", isOpen and 1 or 0, "switched_at", switchedAt ) end -return { circuitState, stateChanged and 1 or 0 } +return { circuitState, circuitSwitched and 1 or 0 } From e78cda13e076713cbe0ac127c511a5951647abba Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Fri, 1 May 2026 08:22:02 +0100 Subject: [PATCH 33/65] CCM-16073 - Addressed PR feedback --- .../__tests__/delivery-observability.test.ts | 6 ++---- .../src/__tests__/endpoint-gate.test.ts | 5 ++++- .../src/__tests__/handler.test.ts | 15 +++++++++++---- lambdas/https-client-lambda/src/handler.ts | 4 +++- .../src/services/delivery-observability.ts | 19 ++++++++++++------- 5 files changed, 32 insertions(+), 17 deletions(-) diff --git a/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts b/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts index b1592a7b..651298c6 100644 --- a/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/delivery-observability.test.ts @@ -235,11 +235,10 @@ describe("delivery-observability", () => { expect(emitClientRateLimited).toHaveBeenCalledWith("target-1", 2); expect(logger.warn).toHaveBeenCalledWith( - "Admission denied", + "Client rate limited", expect.objectContaining({ clientId: "client-1", targetId: "target-1", - reason: "rate_limited", deniedCount: 2, correlationIds: ["msg-a", "msg-b"], }), @@ -256,11 +255,10 @@ describe("delivery-observability", () => { expect(emitCircuitBlocked).toHaveBeenCalledWith("target-1", 1); expect(logger.warn).toHaveBeenCalledWith( - "Admission denied", + "Circuit blocked", expect.objectContaining({ clientId: "client-1", targetId: "target-1", - reason: "circuit_open", deniedCount: 1, correlationIds: ["msg-a"], }), diff --git a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts index dc539a0d..4e697132 100644 --- a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts @@ -256,7 +256,10 @@ describe("recordResult", () => { defaultConfig, ); - expect(result).toEqual({ circuitState: "open_half", circuitSwitched: false }); + expect(result).toEqual({ + circuitState: "open_half", + circuitSwitched: false, + }); }); it("falls back to EVAL on NOSCRIPT error", async () => { diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 0f4581aa..14da4bd2 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -299,13 +299,20 @@ describe("processRecords", () => { expect(mockChangeVisibility).toHaveBeenCalledTimes(1); }); - it("throws when CLIENT_ID is not set", async () => { + it("sends all records to DLQ when CLIENT_ID is not set", async () => { const saved = process.env.CLIENT_ID; delete process.env.CLIENT_ID; - await expect(processRecords([makeRecord()])).rejects.toThrow( - "CLIENT_ID is required", - ); + const record1 = makeRecord({ messageId: "msg-1" }); + const record2 = makeRecord({ messageId: "msg-2" }); + + const failures = await processRecords([record1, record2]); + + expect(failures).toEqual([]); + expect(mockSendToDlq).toHaveBeenCalledWith(record1.body); + expect(mockSendToDlq).toHaveBeenCalledWith(record2.body); + expect(mockSendToDlq).toHaveBeenCalledTimes(2); + expect(mockDeliverPayload).not.toHaveBeenCalled(); process.env.CLIENT_ID = saved; }); diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index e9e1415d..782777f2 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -361,7 +361,9 @@ export async function processRecords( ): Promise { const { CLIENT_ID } = process.env; if (!CLIENT_ID) { - throw new Error("CLIENT_ID is required"); + logger.error("CLIENT_ID is required — sending all records to DLQ"); + await Promise.all(records.map((record) => sendToDlq(record.body))); + return []; } resetMetrics(); diff --git a/lambdas/https-client-lambda/src/services/delivery-observability.ts b/lambdas/https-client-lambda/src/services/delivery-observability.ts index f61807b0..289ed3a9 100644 --- a/lambdas/https-client-lambda/src/services/delivery-observability.ts +++ b/lambdas/https-client-lambda/src/services/delivery-observability.ts @@ -125,16 +125,21 @@ export function recordAdmissionDenied( ): void { if (reason === "circuit_open") { emitCircuitBlocked(targetId, correlationIds.length); + logger.warn("Circuit blocked", { + clientId, + targetId, + deniedCount: correlationIds.length, + correlationIds, + }); } else { emitClientRateLimited(targetId, correlationIds.length); + logger.warn("Client rate limited", { + clientId, + targetId, + deniedCount: correlationIds.length, + correlationIds, + }); } - logger.warn("Admission denied", { - clientId, - targetId, - reason, - deniedCount: correlationIds.length, - correlationIds, - }); } export function recordDeliveryDuration( From 14550124c22f69b82424f6b4af52af329e479b95 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Fri, 1 May 2026 10:36:31 +0100 Subject: [PATCH 34/65] Update IT test assertion following observability changes --- tests/integration/delivery-resilience.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/delivery-resilience.test.ts b/tests/integration/delivery-resilience.test.ts index e3b2218b..1fcbdd79 100644 --- a/tests/integration/delivery-resilience.test.ts +++ b/tests/integration/delivery-resilience.test.ts @@ -172,7 +172,7 @@ describe("Delivery Resilience", () => { const rateLimitedCount = await countLogEntries( ctx.cwLogs, httpsClientLogGroup, - `{ $.msg = "Admission denied" && $.reason = "rate_limited" }`, + `{ $.msg = "Client rate limited" }`, ctx.startTime, 1, ); From 070b3d7aef0170582678d034b8148564fad5e65d Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Fri, 1 May 2026 10:37:41 +0100 Subject: [PATCH 35/65] Fix initial state when circuit breaker enabled --- .../https-client-lambda/src/__tests__/admit-lua.test.ts | 8 ++++---- lambdas/https-client-lambda/src/services/admit.lua | 5 ++++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts b/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts index 7553a1b1..2d6b2d8e 100644 --- a/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts @@ -65,7 +65,7 @@ function runAdmit( describe("admit.lua", () => { describe("rate limiting", () => { - it("rate-limits on a fresh endpoint with no prior state", () => { + it("allows one initial probe token on a fresh endpoint with no prior state", () => { const store = createRedisStore(); const now = 1_000_000; @@ -74,12 +74,12 @@ describe("admit.lua", () => { targetRateLimit: 10, }); - expect(consumedTokens).toBe(0); - expect(reason).toBe("rate_limited"); + expect(consumedTokens).toBe(1); + expect(reason).toBe("some_allowed"); expect(effectiveRate).toBeCloseTo(1 / 60, 5); }); - it("generates a probe token on the second call after enough elapsed time", () => { + it("generates an additional probe token on a subsequent call after enough elapsed time", () => { const store = createRedisStore(); runAdmit(store, { now: 1_000_000, targetRateLimit: 10 }); diff --git a/lambdas/https-client-lambda/src/services/admit.lua b/lambdas/https-client-lambda/src/services/admit.lua index 3c47f504..ad084b68 100644 --- a/lambdas/https-client-lambda/src/services/admit.lua +++ b/lambdas/https-client-lambda/src/services/admit.lua @@ -29,7 +29,7 @@ local CIRCUIT_OPEN = "circuit_open" local SOME_ALLOWED = "some_allowed" -- Keys -local epKey = KEYS[1] -- ep:{targetId} combined endpoint state hash +local epKey = KEYS[1] -- ep:{targetId} combined endpoint state hash -- Arguments local now = tonumber(ARGV[1]) or 0 @@ -104,6 +104,9 @@ end if isOpen then bucketTokens = 0 + if rlNeedInit and isHalfOpen then + bucketTokens = 1 + end end local generatedTokens = math.floor((now - bucketRefilledAt) * effectiveRate / 1000) From c8207c00f95d8ecd6b26075c084edacaa68c5078 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Fri, 1 May 2026 12:35:24 +0100 Subject: [PATCH 36/65] Fix debug int test script --- scripts/tests/integration-debug.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/tests/integration-debug.sh b/scripts/tests/integration-debug.sh index b6882528..d7892526 100755 --- a/scripts/tests/integration-debug.sh +++ b/scripts/tests/integration-debug.sh @@ -149,8 +149,6 @@ log_filter_args() { # CloudWatch filter patterns treat quoted strings as exact phrases. printf '%s\n' --filter-pattern "\"$escaped_log_filter\"" fi - - printf '%s\n' "${args[@]}" return 0 } From 624f706203aaef5c518dd4cd4410ca5372fd0ace Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Fri, 1 May 2026 14:18:15 +0100 Subject: [PATCH 37/65] Fix circuit breaker IT test assertion following observability changes --- tests/integration/delivery-resilience.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/delivery-resilience.test.ts b/tests/integration/delivery-resilience.test.ts index 1fcbdd79..0d6c083c 100644 --- a/tests/integration/delivery-resilience.test.ts +++ b/tests/integration/delivery-resilience.test.ts @@ -262,7 +262,7 @@ describe("Delivery Resilience", () => { const circuitOpenCount = await countLogEntries( ctx.cwLogs, httpsClientLogGroup, - `{ $.msg = "Admission denied" && $.reason = "circuit_open" }`, + `{ $.msg = "Circuit blocked" }`, ctx.startTime, 1, ); From 5a7a5a7ad77809788607feb49ec867b021c5ba3b Mon Sep 17 00:00:00 2001 From: Tim Marston Date: Tue, 5 May 2026 12:54:12 +0100 Subject: [PATCH 38/65] remove shim for migrated logger --- .../src/__tests__/index.test.ts | 6 +++--- .../src/__tests__/services/callback-logger.test.ts | 2 +- .../src/__tests__/services/config-loader.test.ts | 9 --------- .../services/filters/channel-status-filter.test.ts | 2 +- .../services/filters/message-status-filter.test.ts | 2 +- .../src/__tests__/services/subscription-filter.test.ts | 2 +- lambdas/client-transform-filter-lambda/src/handler.ts | 2 +- lambdas/client-transform-filter-lambda/src/index.ts | 2 +- .../src/services/callback-logger.ts | 2 +- .../src/services/config-loader.ts | 2 +- .../src/services/filters/channel-status-filter.ts | 2 +- .../src/services/filters/message-status-filter.ts | 2 +- .../src/services/logger.ts | 1 - .../src/services/observability.ts | 4 ++-- .../src/services/subscription-filter.ts | 2 +- .../src/services/validators/event-validator.ts | 2 +- 16 files changed, 17 insertions(+), 27 deletions(-) delete mode 100644 lambdas/client-transform-filter-lambda/src/services/logger.ts diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/index.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/index.test.ts index 168d128d..d21cdf4e 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/index.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/index.test.ts @@ -7,7 +7,7 @@ import type { MessageStatusData, StatusPublishEvent, } from "@nhs-notify-client-callbacks/models"; -import type { Logger } from "services/logger"; +import type { Logger } from "@nhs-notify-client-callbacks/logger"; import type { CallbackMetrics } from "services/metrics"; import type { ConfigLoader } from "services/config-loader"; import { ObservabilityService } from "services/observability"; @@ -547,7 +547,7 @@ describe("createHandler default wiring", () => { CallbackMetrics: state.CallbackMetrics, })); - jest.doMock("services/logger", () => ({ + jest.doMock("@nhs-notify-client-callbacks/logger", () => ({ Logger: state.LoggerCtor, })); @@ -592,7 +592,7 @@ describe("createHandler default wiring", () => { expect(result).toEqual(["ok"]); jest.unmock("services/metrics"); - jest.unmock("services/logger"); + jest.unmock("@nhs-notify-client-callbacks/logger"); jest.unmock("services/observability"); jest.unmock("handler"); }); diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/callback-logger.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/callback-logger.test.ts index 4d18ce42..d3c89858 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/callback-logger.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/services/callback-logger.test.ts @@ -2,7 +2,7 @@ import { logCallbackGenerated, logCallbackSigned, } from "services/callback-logger"; -import type { Logger } from "services/logger"; +import type { Logger } from "@nhs-notify-client-callbacks/logger"; import { type ClientCallbackPayload, EventTypes, diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader.test.ts index c9fecdce..e15e7ee6 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/services/config-loader.test.ts @@ -4,15 +4,6 @@ import { ConfigSubscriptionCache } from "@nhs-notify-client-callbacks/config-sub import { ConfigLoader } from "services/config-loader"; import { ConfigValidationError } from "services/validators/config-validator"; -jest.mock("services/logger", () => ({ - logger: { - debug: jest.fn(), - info: jest.fn(), - warn: jest.fn(), - error: jest.fn(), - }, -})); - jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { debug: jest.fn(), diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/filters/channel-status-filter.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/filters/channel-status-filter.test.ts index e04eaf85..259d06ff 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/filters/channel-status-filter.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/services/filters/channel-status-filter.test.ts @@ -6,7 +6,7 @@ import { EventTypes } from "@nhs-notify-client-callbacks/models"; import { createChannelStatusConfig } from "__tests__/helpers/client-subscription-fixtures"; import { matchesChannelStatusSubscription } from "services/filters/channel-status-filter"; -jest.mock("services/logger", () => ({ +jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { debug: jest.fn(), info: jest.fn(), diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/filters/message-status-filter.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/filters/message-status-filter.test.ts index ca9bd416..418c790e 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/filters/message-status-filter.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/services/filters/message-status-filter.test.ts @@ -6,7 +6,7 @@ import { EventTypes } from "@nhs-notify-client-callbacks/models"; import { createMessageStatusConfig } from "__tests__/helpers/client-subscription-fixtures"; import { matchesMessageStatusSubscription } from "services/filters/message-status-filter"; -jest.mock("services/logger", () => ({ +jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { debug: jest.fn(), info: jest.fn(), diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/subscription-filter.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/subscription-filter.test.ts index 153ab934..b9ece544 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/subscription-filter.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/services/subscription-filter.test.ts @@ -18,7 +18,7 @@ import { import { TransformationError } from "services/error-handler"; import { evaluateSubscriptionFilters } from "services/subscription-filter"; -jest.mock("services/logger", () => ({ +jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { debug: jest.fn(), info: jest.fn(), diff --git a/lambdas/client-transform-filter-lambda/src/handler.ts b/lambdas/client-transform-filter-lambda/src/handler.ts index be05991c..f1f60c84 100644 --- a/lambdas/client-transform-filter-lambda/src/handler.ts +++ b/lambdas/client-transform-filter-lambda/src/handler.ts @@ -7,7 +7,7 @@ import type { } from "@nhs-notify-client-callbacks/models"; import { validateStatusPublishEvent } from "services/validators/event-validator"; import { transformEvent } from "services/transformers/event-transformer"; -import { extractCorrelationId } from "services/logger"; +import { extractCorrelationId } from "@nhs-notify-client-callbacks/logger"; import { ValidationError, getEventError } from "services/error-handler"; import type { ObservabilityService } from "services/observability"; import type { ConfigLoader } from "services/config-loader"; diff --git a/lambdas/client-transform-filter-lambda/src/index.ts b/lambdas/client-transform-filter-lambda/src/index.ts index 5ef8e197..4b3cdc9f 100644 --- a/lambdas/client-transform-filter-lambda/src/index.ts +++ b/lambdas/client-transform-filter-lambda/src/index.ts @@ -1,5 +1,5 @@ import type { SQSRecord } from "aws-lambda"; -import { Logger } from "services/logger"; +import { Logger } from "@nhs-notify-client-callbacks/logger"; import { CallbackMetrics, createMetricLogger } from "services/metrics"; import { ObservabilityService } from "services/observability"; import { ConfigLoaderService } from "services/config-loader-service"; diff --git a/lambdas/client-transform-filter-lambda/src/services/callback-logger.ts b/lambdas/client-transform-filter-lambda/src/services/callback-logger.ts index a3ac6a25..b177fcbf 100644 --- a/lambdas/client-transform-filter-lambda/src/services/callback-logger.ts +++ b/lambdas/client-transform-filter-lambda/src/services/callback-logger.ts @@ -4,7 +4,7 @@ import { EventTypes, type MessageStatusAttributes, } from "@nhs-notify-client-callbacks/models"; -import type { Logger } from "services/logger"; +import type { Logger } from "@nhs-notify-client-callbacks/logger"; function isMessageStatusAttributes( attributes: MessageStatusAttributes | ChannelStatusAttributes, diff --git a/lambdas/client-transform-filter-lambda/src/services/config-loader.ts b/lambdas/client-transform-filter-lambda/src/services/config-loader.ts index 0b272774..32a4e370 100644 --- a/lambdas/client-transform-filter-lambda/src/services/config-loader.ts +++ b/lambdas/client-transform-filter-lambda/src/services/config-loader.ts @@ -1,6 +1,6 @@ import type { ConfigSubscriptionCache } from "@nhs-notify-client-callbacks/config-subscription-cache"; import type { ClientSubscriptionConfiguration } from "@nhs-notify-client-callbacks/models"; -import { logger } from "services/logger"; +import { logger } from "@nhs-notify-client-callbacks/logger"; import { wrapUnknownError } from "services/error-handler"; import { ConfigValidationError } from "services/validators/config-validator"; diff --git a/lambdas/client-transform-filter-lambda/src/services/filters/channel-status-filter.ts b/lambdas/client-transform-filter-lambda/src/services/filters/channel-status-filter.ts index e43394bf..1a669281 100644 --- a/lambdas/client-transform-filter-lambda/src/services/filters/channel-status-filter.ts +++ b/lambdas/client-transform-filter-lambda/src/services/filters/channel-status-filter.ts @@ -4,7 +4,7 @@ import type { ClientSubscriptionConfiguration, StatusPublishEvent, } from "@nhs-notify-client-callbacks/models"; -import { logger } from "services/logger"; +import { logger } from "@nhs-notify-client-callbacks/logger"; const isChannelStatusSubscription = ( subscription: ClientSubscriptionConfiguration["subscriptions"][number], diff --git a/lambdas/client-transform-filter-lambda/src/services/filters/message-status-filter.ts b/lambdas/client-transform-filter-lambda/src/services/filters/message-status-filter.ts index 01bead4f..c51f8c71 100644 --- a/lambdas/client-transform-filter-lambda/src/services/filters/message-status-filter.ts +++ b/lambdas/client-transform-filter-lambda/src/services/filters/message-status-filter.ts @@ -4,7 +4,7 @@ import type { MessageStatusSubscriptionConfiguration, StatusPublishEvent, } from "@nhs-notify-client-callbacks/models"; -import { logger } from "services/logger"; +import { logger } from "@nhs-notify-client-callbacks/logger"; const isMessageStatusSubscription = ( subscription: ClientSubscriptionConfiguration["subscriptions"][number], diff --git a/lambdas/client-transform-filter-lambda/src/services/logger.ts b/lambdas/client-transform-filter-lambda/src/services/logger.ts deleted file mode 100644 index 5c373b25..00000000 --- a/lambdas/client-transform-filter-lambda/src/services/logger.ts +++ /dev/null @@ -1 +0,0 @@ -export * from "@nhs-notify-client-callbacks/logger"; diff --git a/lambdas/client-transform-filter-lambda/src/services/observability.ts b/lambdas/client-transform-filter-lambda/src/services/observability.ts index efd55eea..e921db26 100644 --- a/lambdas/client-transform-filter-lambda/src/services/observability.ts +++ b/lambdas/client-transform-filter-lambda/src/services/observability.ts @@ -1,8 +1,8 @@ import type { MetricsLogger } from "aws-embedded-metrics"; import type { ClientCallbackPayload } from "@nhs-notify-client-callbacks/models"; import { logCallbackGenerated } from "services/callback-logger"; -import type { Logger } from "services/logger"; -import { logLifecycleEvent } from "services/logger"; +import type { Logger } from "@nhs-notify-client-callbacks/logger"; +import { logLifecycleEvent } from "@nhs-notify-client-callbacks/logger"; import type { CallbackMetrics } from "services/metrics"; export class ObservabilityService { diff --git a/lambdas/client-transform-filter-lambda/src/services/subscription-filter.ts b/lambdas/client-transform-filter-lambda/src/services/subscription-filter.ts index 2a51627f..dbca66ff 100644 --- a/lambdas/client-transform-filter-lambda/src/services/subscription-filter.ts +++ b/lambdas/client-transform-filter-lambda/src/services/subscription-filter.ts @@ -8,7 +8,7 @@ import { EventTypes } from "@nhs-notify-client-callbacks/models"; import { matchesChannelStatusSubscription } from "services/filters/channel-status-filter"; import { matchesMessageStatusSubscription } from "services/filters/message-status-filter"; import { TransformationError } from "services/error-handler"; -import { logger } from "services/logger"; +import { logger } from "@nhs-notify-client-callbacks/logger"; type FilterResult = { matched: boolean; diff --git a/lambdas/client-transform-filter-lambda/src/services/validators/event-validator.ts b/lambdas/client-transform-filter-lambda/src/services/validators/event-validator.ts index 03e37807..a726eac4 100644 --- a/lambdas/client-transform-filter-lambda/src/services/validators/event-validator.ts +++ b/lambdas/client-transform-filter-lambda/src/services/validators/event-validator.ts @@ -11,7 +11,7 @@ import { ValidationError, formatValidationIssuePath, } from "services/error-handler"; -import { extractCorrelationId } from "services/logger"; +import { extractCorrelationId } from "@nhs-notify-client-callbacks/logger"; const NHSNotifyExtensionsSchema = z.object({ traceparent: z.string().min(1), From 812d50edc4ea5a5549a10fa8f125b64caf9aaa89 Mon Sep 17 00:00:00 2001 From: Tim Marston Date: Tue, 5 May 2026 16:23:05 +0100 Subject: [PATCH 39/65] removed dead src/config-cache --- knip.ts | 3 - pnpm-lock.yaml | 28 ------- src/config-cache/jest.config.ts | 14 ---- src/config-cache/package.json | 32 -------- .../src/__tests__/config-cache.test.ts | 75 ------------------- src/config-cache/src/config-cache.ts | 33 -------- src/config-cache/src/index.ts | 1 - src/config-cache/tsconfig.json | 14 ---- 8 files changed, 200 deletions(-) delete mode 100644 src/config-cache/jest.config.ts delete mode 100644 src/config-cache/package.json delete mode 100644 src/config-cache/src/__tests__/config-cache.test.ts delete mode 100644 src/config-cache/src/config-cache.ts delete mode 100644 src/config-cache/src/index.ts delete mode 100644 src/config-cache/tsconfig.json diff --git a/knip.ts b/knip.ts index f8612f70..fc3901d9 100644 --- a/knip.ts +++ b/knip.ts @@ -41,9 +41,6 @@ const config: KnipConfig = { "lambdas/perf-runner-lambda": { ignoreDependencies: ["@tsconfig/node22", "@types/aws-lambda"], }, - "src/config-cache": { - ignoreDependencies: ["@tsconfig/node22"], - }, "src/config-subscription-cache": { ignoreDependencies: ["@tsconfig/node22"], }, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 12926732..31de1237 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -497,34 +497,6 @@ importers: specifier: catalog:tools version: 5.9.3 - src/config-cache: - dependencies: - '@nhs-notify-client-callbacks/models': - specifier: workspace:* - version: link:../models - devDependencies: - '@tsconfig/node22': - specifier: catalog:tools - version: 22.0.5 - '@types/jest': - specifier: catalog:test - version: 30.0.0 - '@types/node': - specifier: catalog:tools - version: 25.6.0 - eslint: - specifier: catalog:lint - version: 9.39.4(jiti@2.6.1) - jest: - specifier: catalog:test - version: 30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)) - ts-jest: - specifier: catalog:test - version: 29.4.9(@babel/core@7.29.0)(@jest/transform@30.3.0)(@jest/types@30.3.0)(babel-jest@30.3.0(@babel/core@7.29.0))(esbuild@0.28.0)(jest-util@30.3.0)(jest@30.3.0(@types/node@25.6.0)(ts-node@10.9.2(@types/node@25.6.0)(typescript@5.9.3)))(typescript@5.9.3) - typescript: - specifier: catalog:tools - version: 5.9.3 - src/config-subscription-cache: dependencies: '@aws-sdk/client-s3': diff --git a/src/config-cache/jest.config.ts b/src/config-cache/jest.config.ts deleted file mode 100644 index 6ecf333b..00000000 --- a/src/config-cache/jest.config.ts +++ /dev/null @@ -1,14 +0,0 @@ -import { nodeJestConfig } from "../../jest.config.base.ts"; - -export default { - ...nodeJestConfig, - coverageThreshold: { - global: { - ...nodeJestConfig.coverageThreshold?.global, - branches: 100, - functions: 100, - lines: 100, - statements: 100, - }, - }, -}; diff --git a/src/config-cache/package.json b/src/config-cache/package.json deleted file mode 100644 index 61bf815f..00000000 --- a/src/config-cache/package.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "exports": { - ".": { - "types": "./src/index.ts", - "default": "./src/index.ts" - } - }, - "dependencies": { - "@nhs-notify-client-callbacks/models": "workspace:*" - }, - "devDependencies": { - "@tsconfig/node22": "catalog:tools", - "@types/jest": "catalog:test", - "@types/node": "catalog:tools", - "eslint": "catalog:lint", - "jest": "catalog:test", - "ts-jest": "catalog:test", - "typescript": "catalog:tools" - }, - "engines": { - "node": ">=24.14.1" - }, - "name": "@nhs-notify-client-callbacks/config-cache", - "private": true, - "scripts": { - "lint": "eslint .", - "lint:fix": "eslint . --fix", - "test:unit": "jest", - "typecheck": "tsc --noEmit" - }, - "version": "0.0.1" -} diff --git a/src/config-cache/src/__tests__/config-cache.test.ts b/src/config-cache/src/__tests__/config-cache.test.ts deleted file mode 100644 index 179a178a..00000000 --- a/src/config-cache/src/__tests__/config-cache.test.ts +++ /dev/null @@ -1,75 +0,0 @@ -import type { ClientSubscriptionConfiguration } from "@nhs-notify-client-callbacks/models"; -import { ConfigCache } from "config-cache"; - -const createConfig = (clientId: string): ClientSubscriptionConfiguration => ({ - clientId, - subscriptions: [], - targets: [], -}); - -describe("ConfigCache", () => { - it("stores and retrieves configuration", () => { - const cache = new ConfigCache(60_000); - const config = createConfig("client-1"); - - cache.set("client-1", config); - - expect(cache.get("client-1")).toEqual(config); - }); - - it("returns undefined for non-existent key", () => { - const cache = new ConfigCache(60_000); - - expect(cache.get("non-existent")).toBeUndefined(); - }); - - it("returns cached value without re-fetch when within TTL", () => { - jest.useFakeTimers(); - jest.setSystemTime(new Date("2026-01-01T10:00:00Z")); - - const cache = new ConfigCache(5000); - const config = createConfig("client-1"); - - cache.set("client-1", config); - - jest.advanceTimersByTime(4999); - - expect(cache.get("client-1")).toEqual(config); - - jest.useRealTimers(); - }); - - it("returns undefined for expired entries after TTL", () => { - jest.useFakeTimers(); - jest.setSystemTime(new Date("2026-01-01T10:00:00Z")); - - const cache = new ConfigCache(1000); - const config = createConfig("client-1"); - - cache.set("client-1", config); - expect(cache.get("client-1")).toEqual(config); - - jest.advanceTimersByTime(1001); - - expect(cache.get("client-1")).toBeUndefined(); - - jest.useRealTimers(); - }); - - it("clears all entries", () => { - const cache = new ConfigCache(60_000); - const configA = createConfig("client-a"); - const configB = createConfig("client-b"); - - cache.set("client-a", configA); - cache.set("client-b", configB); - - expect(cache.get("client-a")).toEqual(configA); - expect(cache.get("client-b")).toEqual(configB); - - cache.clear(); - - expect(cache.get("client-a")).toBeUndefined(); - expect(cache.get("client-b")).toBeUndefined(); - }); -}); diff --git a/src/config-cache/src/config-cache.ts b/src/config-cache/src/config-cache.ts deleted file mode 100644 index 641cc60c..00000000 --- a/src/config-cache/src/config-cache.ts +++ /dev/null @@ -1,33 +0,0 @@ -import type { ClientSubscriptionConfiguration } from "@nhs-notify-client-callbacks/models"; - -type CacheEntry = { - value: ClientSubscriptionConfiguration; - expiresAt: number; -}; - -export class ConfigCache { - private readonly cache = new Map(); - - constructor(private readonly ttlMs: number) {} - - get(clientId: string): ClientSubscriptionConfiguration | undefined { - const entry = this.cache.get(clientId); - - if (entry && entry.expiresAt <= Date.now()) { - this.cache.delete(clientId); - } - - return this.cache.get(clientId)?.value; - } - - set(clientId: string, value: ClientSubscriptionConfiguration): void { - this.cache.set(clientId, { - value, - expiresAt: Date.now() + this.ttlMs, - }); - } - - clear(): void { - this.cache.clear(); - } -} diff --git a/src/config-cache/src/index.ts b/src/config-cache/src/index.ts deleted file mode 100644 index 1da1a0f1..00000000 --- a/src/config-cache/src/index.ts +++ /dev/null @@ -1 +0,0 @@ -export { ConfigCache } from "./config-cache"; diff --git a/src/config-cache/tsconfig.json b/src/config-cache/tsconfig.json deleted file mode 100644 index a50e6fc0..00000000 --- a/src/config-cache/tsconfig.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "compilerOptions": { - "isolatedModules": true, - "paths": { - "*": [ - "./src/*" - ] - } - }, - "extends": "../../tsconfig.base.json", - "include": [ - "src/**/*" - ] -} From a8fcc47a1acb71b371a14148e15eb542a4be7a30 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Tue, 5 May 2026 17:15:03 +0100 Subject: [PATCH 40/65] CCM-16073 - Performance test changes and concurrency optimisation (#173) - Performance test changes and throughput optimisation --------- Co-authored-by: rhyscoxnhs --- .../terraform/components/callbacks/README.md | 6 +- .../callbacks/module_client_delivery.tf | 13 +- .../callbacks/module_perf_runner_lambda.tf | 18 +- .../terraform/components/callbacks/pre.sh | 14 +- .../components/callbacks/variables.tf | 26 +- .../modules/client-delivery/README.md | 15 +- .../client-delivery/iam_role_sqs_target.tf | 2 +- .../module_https_client_lambda.tf | 23 +- .../modules/client-delivery/variables.tf | 32 ++- .../src/__tests__/handler.test.ts | 28 +- lambdas/https-client-lambda/src/handler.ts | 50 ++-- .../src/__tests__/index.test.ts | 48 ++++ lambdas/mock-webhook-lambda/src/index.ts | 251 ++++++++++++------ .../src/__tests__/event-factories.test.ts | 53 ++++ .../src/__tests__/index.test.ts | 94 +++++-- .../src/__tests__/purge.test.ts | 20 +- .../src/__tests__/runner.test.ts | 62 ++++- .../src/__tests__/sqs-stats.test.ts | 75 ++++++ .../perf-runner-lambda/src/event-factories.ts | 46 +++- lambdas/perf-runner-lambda/src/index.ts | 5 +- lambdas/perf-runner-lambda/src/purge.ts | 13 +- lambdas/perf-runner-lambda/src/runner.ts | 130 ++++++--- lambdas/perf-runner-lambda/src/scenario.ts | 82 ------ lambdas/perf-runner-lambda/src/sqs-stats.ts | 29 ++ lambdas/perf-runner-lambda/src/types.ts | 18 +- tests/integration/delivery-resilience.test.ts | 2 +- .../subscriptions/mock-client-rate-limit.json | 2 +- .../fixtures/subscriptions/perf-client-1.json | 2 +- .../fixtures/subscriptions/perf-client-2.json | 2 +- 29 files changed, 849 insertions(+), 312 deletions(-) create mode 100644 lambdas/perf-runner-lambda/src/__tests__/sqs-stats.test.ts delete mode 100644 lambdas/perf-runner-lambda/src/scenario.ts create mode 100644 lambdas/perf-runner-lambda/src/sqs-stats.ts diff --git a/infrastructure/terraform/components/callbacks/README.md b/infrastructure/terraform/components/callbacks/README.md index 9889ab22..ec0f13e8 100644 --- a/infrastructure/terraform/components/callbacks/README.md +++ b/infrastructure/terraform/components/callbacks/README.md @@ -17,8 +17,12 @@ |------|-------------|------|---------|:--------:| | [applications\_map\_parameter\_name](#input\_applications\_map\_parameter\_name) | SSM Parameter Store path for the clientId-to-applicationData map, where applicationData is currently only the applicationId | `string` | `null` | no | | [aws\_account\_id](#input\_aws\_account\_id) | The AWS Account ID (numeric) | `string` | n/a | yes | +| [cb\_cooldown\_period\_ms](#input\_cb\_cooldown\_period\_ms) | Full block duration after circuit opens, before half-open probes begin (ms) | `number` | `120000` | no | +| [cb\_recovery\_period\_ms](#input\_cb\_recovery\_period\_ms) | Linear ramp-up duration after circuit closes (ms) | `number` | `600000` | no | | [component](#input\_component) | The variable encapsulating the name of this component | `string` | `"callbacks"` | no | | [default\_tags](#input\_default\_tags) | A map of default tags to apply to all taggable resources within the component | `map(string)` | `{}` | no | +| [delivery\_lambda\_batch\_size](#input\_delivery\_lambda\_batch\_size) | Number of SQS messages per delivery Lambda invocation | `number` | `100` | no | +| [delivery\_lambda\_batching\_window\_sec](#input\_delivery\_lambda\_batching\_window\_sec) | Maximum time in seconds to wait for a full batch before invoking the delivery Lambda | `number` | `1` | no | | [deploy\_mock\_clients](#input\_deploy\_mock\_clients) | Flag to deploy mock webhook lambda for integration testing (test/dev environments only) | `bool` | `false` | no | | [deploy\_perf\_runner](#input\_deploy\_perf\_runner) | Flag to deploy the perf-runner lambda for performance testing (test/dev environments only) | `bool` | `false` | no | | [elasticache\_data\_storage\_maximum\_gb](#input\_elasticache\_data\_storage\_maximum\_gb) | Maximum data storage in GB for the ElastiCache Serverless delivery state cache | `number` | `1` | no | @@ -40,7 +44,7 @@ | [parent\_acct\_environment](#input\_parent\_acct\_environment) | Name of the environment responsible for the acct resources used, affects things like DNS zone. Useful for named dev environments | `string` | `"main"` | no | | [pipe\_event\_patterns](#input\_pipe\_event\_patterns) | value | `list(string)` | `[]` | no | | [pipe\_log\_level](#input\_pipe\_log\_level) | Log level for the EventBridge Pipe. | `string` | `"ERROR"` | no | -| [pipe\_sqs\_input\_batch\_size](#input\_pipe\_sqs\_input\_batch\_size) | n/a | `number` | `1` | no | +| [pipe\_sqs\_input\_batch\_size](#input\_pipe\_sqs\_input\_batch\_size) | n/a | `number` | `10` | no | | [pipe\_sqs\_max\_batch\_window](#input\_pipe\_sqs\_max\_batch\_window) | n/a | `number` | `2` | no | | [project](#input\_project) | The name of the tfscaffold project | `string` | n/a | yes | | [region](#input\_region) | The AWS Region | `string` | n/a | yes | diff --git a/infrastructure/terraform/components/callbacks/module_client_delivery.tf b/infrastructure/terraform/components/callbacks/module_client_delivery.tf index cce31bd5..5a8e7974 100644 --- a/infrastructure/terraform/components/callbacks/module_client_delivery.tf +++ b/infrastructure/terraform/components/callbacks/module_client_delivery.tf @@ -21,8 +21,8 @@ module "client_delivery" { applications_map_parameter_name = local.applications_map_parameter_name - lambda_s3_bucket = local.acct.s3_buckets["lambda_function_artefacts"]["id"] - lambda_code_base_path = local.aws_lambda_functions_dir_path + delivery_lambda_s3_bucket = local.acct.s3_buckets["lambda_function_artefacts"]["id"] + delivery_lambda_code_base_path = local.aws_lambda_functions_dir_path force_lambda_code_deploy = var.force_lambda_code_deploy log_level = var.log_level @@ -41,7 +41,12 @@ module "client_delivery" { mtls_ca_s3_key = local.mtls_ca_s3_key # gitleaks:allow token_bucket_burst_capacity = var.token_bucket_burst_capacity + cb_cooldown_period_ms = var.cb_cooldown_period_ms + cb_recovery_period_ms = var.cb_recovery_period_ms - vpc_subnet_ids = try(local.acct.private_subnets[local.bc_name], []) - lambda_security_group_id = aws_security_group.https_client_lambda.id + delivery_lambda_batch_size = var.delivery_lambda_batch_size + delivery_lambda_batching_window_sec = var.delivery_lambda_batching_window_sec + + vpc_subnet_ids = try(local.acct.private_subnets[local.bc_name], []) + delivery_lambda_security_group_id = aws_security_group.https_client_lambda.id } diff --git a/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf b/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf index 7a77c40c..a7bf92db 100644 --- a/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf +++ b/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf @@ -93,7 +93,23 @@ data "aws_iam_policy_document" "perf_runner_lambda" { resources = [ module.sqs_inbound_event.sqs_queue_arn, - "${module.sqs_inbound_event.sqs_queue_arn}-dlq", + module.sqs_inbound_event.sqs_dlq_arn, + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.csi}-*-delivery-queue", + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.csi}-*-delivery-dlq-queue", + ] + } + + statement { + sid = "SQSGetQueueAttributes" + effect = "Allow" + + actions = [ + "sqs:GetQueueAttributes", + ] + + resources = [ + module.sqs_inbound_event.sqs_queue_arn, + module.sqs_inbound_event.sqs_dlq_arn, "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.csi}-*-delivery-queue", "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.csi}-*-delivery-dlq-queue", ] diff --git a/infrastructure/terraform/components/callbacks/pre.sh b/infrastructure/terraform/components/callbacks/pre.sh index 39eb0817..c8041ef6 100755 --- a/infrastructure/terraform/components/callbacks/pre.sh +++ b/infrastructure/terraform/components/callbacks/pre.sh @@ -6,16 +6,22 @@ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" # shellcheck source=_paths.sh source "${script_dir}/_paths.sh" -# Resolve deploy_mock_clients and deploy_perf_runner from tfvars; base_path/group/region/environment are in scope from terraform.sh +# Resolve tfvar overrides +tfvar_value() { + local key="$1" file="$2" + # Extract the value after '=', stripping surrounding whitespace and quotes + grep -E "^\s*${key}\s*=" "${file}" | tail -1 | sed 's/.*=\s*//;s/\s*$//;s/^"//;s/"$//' +} + deploy_mock_clients="false" deploy_perf_runner="false" for _tfvar_file in \ "${base_path}/etc/group_${group}.tfvars" \ "${base_path}/etc/env_${region}_${environment}.tfvars"; do - if [[ -f "${_tfvar_file}" ]]; then - _val=$(grep -E '^\s*deploy_mock_clients\s*=' "${_tfvar_file}" | tail -1 | sed 's/.*=\s*//;s/\s*$//') + if [ -f "${_tfvar_file}" ]; then + _val=$(tfvar_value deploy_mock_clients "${_tfvar_file}") [ -n "${_val}" ] && deploy_mock_clients="${_val}" - _val=$(grep -E '^\s*deploy_perf_runner\s*=' "${_tfvar_file}" | tail -1 | sed 's/.*=\s*//;s/\s*$//') + _val=$(tfvar_value deploy_perf_runner "${_tfvar_file}") [ -n "${_val}" ] && deploy_perf_runner="${_val}" fi done diff --git a/infrastructure/terraform/components/callbacks/variables.tf b/infrastructure/terraform/components/callbacks/variables.tf index 68e4eafd..44b2347f 100644 --- a/infrastructure/terraform/components/callbacks/variables.tf +++ b/infrastructure/terraform/components/callbacks/variables.tf @@ -102,7 +102,7 @@ variable "pipe_log_level" { variable "pipe_sqs_input_batch_size" { type = number - default = 1 + default = 10 } variable "pipe_sqs_max_batch_window" { @@ -213,3 +213,27 @@ variable "token_bucket_burst_capacity" { description = "Token bucket burst capacity used by the rate limiter" default = 2250 } + +variable "cb_cooldown_period_ms" { + type = number + description = "Full block duration after circuit opens, before half-open probes begin (ms)" + default = 120000 +} + +variable "cb_recovery_period_ms" { + type = number + description = "Linear ramp-up duration after circuit closes (ms)" + default = 600000 +} + +variable "delivery_lambda_batch_size" { + type = number + description = "Number of SQS messages per delivery Lambda invocation" + default = 100 +} + +variable "delivery_lambda_batching_window_sec" { + type = number + description = "Maximum time in seconds to wait for a full batch before invoking the delivery Lambda" + default = 1 +} diff --git a/infrastructure/terraform/modules/client-delivery/README.md b/infrastructure/terraform/modules/client-delivery/README.md index 22b98e26..2debff89 100644 --- a/infrastructure/terraform/modules/client-delivery/README.md +++ b/infrastructure/terraform/modules/client-delivery/README.md @@ -11,11 +11,20 @@ No requirements. |------|-------------|------|---------|:--------:| | [applications\_map\_parameter\_name](#input\_applications\_map\_parameter\_name) | SSM Parameter Store path for the clientId-to-applicationData map | `string` | n/a | yes | | [aws\_account\_id](#input\_aws\_account\_id) | Account ID | `string` | n/a | yes | +| [cb\_cooldown\_period\_ms](#input\_cb\_cooldown\_period\_ms) | Full block duration after circuit opens, before half-open probes begin (ms) | `number` | `120000` | no | +| [cb\_recovery\_period\_ms](#input\_cb\_recovery\_period\_ms) | Linear ramp-up duration after circuit closes (ms) | `number` | `600000` | no | | [client\_bus\_name](#input\_client\_bus\_name) | EventBridge bus name for subscription rules | `string` | n/a | yes | | [client\_config\_bucket](#input\_client\_config\_bucket) | S3 bucket name containing client subscription configuration | `string` | n/a | yes | | [client\_config\_bucket\_arn](#input\_client\_config\_bucket\_arn) | S3 bucket ARN containing client subscription configuration | `string` | n/a | yes | | [client\_id](#input\_client\_id) | Unique identifier for this client | `string` | n/a | yes | | [component](#input\_component) | Component name | `string` | n/a | yes | +| [delivery\_lambda\_batch\_size](#input\_delivery\_lambda\_batch\_size) | Number of SQS messages per Lambda invocation | `number` | `100` | no | +| [delivery\_lambda\_batching\_window\_sec](#input\_delivery\_lambda\_batching\_window\_sec) | Maximum time in seconds to wait for a full batch before invoking Lambda. Allows the delivery queue to fill to batch\_size, improving Lambda concurrency utilisation. | `number` | `1` | no | +| [delivery\_lambda\_code\_base\_path](#input\_delivery\_lambda\_code\_base\_path) | Base path to Lambda source code directories | `string` | n/a | yes | +| [delivery\_lambda\_memory](#input\_delivery\_lambda\_memory) | Lambda memory allocation in MB | `number` | `256` | no | +| [delivery\_lambda\_s3\_bucket](#input\_delivery\_lambda\_s3\_bucket) | S3 bucket for Lambda function artefacts | `string` | n/a | yes | +| [delivery\_lambda\_security\_group\_id](#input\_delivery\_lambda\_security\_group\_id) | Security group ID for the Lambda function | `string` | `""` | no | +| [delivery\_lambda\_timeout](#input\_delivery\_lambda\_timeout) | Lambda timeout in seconds | `number` | `30` | no | | [elasticache\_cache\_name](#input\_elasticache\_cache\_name) | ElastiCache cache name for SigV4 token presigning | `string` | `""` | no | | [elasticache\_endpoint](#input\_elasticache\_endpoint) | ElastiCache Serverless endpoint URL | `string` | `""` | no | | [elasticache\_iam\_username](#input\_elasticache\_iam\_username) | IAM username for ElastiCache authentication | `string` | `""` | no | @@ -24,12 +33,6 @@ No requirements. | [force\_lambda\_code\_deploy](#input\_force\_lambda\_code\_deploy) | Force Lambda code redeployment even when commit tag matches | `bool` | `false` | no | | [group](#input\_group) | The name of the tfscaffold group | `string` | `null` | no | | [kms\_key\_arn](#input\_kms\_key\_arn) | KMS Key ARN for encryption at rest | `string` | n/a | yes | -| [lambda\_batch\_size](#input\_lambda\_batch\_size) | Number of SQS messages per Lambda invocation | `number` | `10` | no | -| [lambda\_code\_base\_path](#input\_lambda\_code\_base\_path) | Base path to Lambda source code directories | `string` | n/a | yes | -| [lambda\_memory](#input\_lambda\_memory) | Lambda memory allocation in MB | `number` | `256` | no | -| [lambda\_s3\_bucket](#input\_lambda\_s3\_bucket) | S3 bucket for Lambda function artefacts | `string` | n/a | yes | -| [lambda\_security\_group\_id](#input\_lambda\_security\_group\_id) | Security group ID for the Lambda function | `string` | `""` | no | -| [lambda\_timeout](#input\_lambda\_timeout) | Lambda timeout in seconds | `number` | `30` | no | | [log\_destination\_arn](#input\_log\_destination\_arn) | Firehose destination ARN for log forwarding | `string` | `""` | no | | [log\_level](#input\_log\_level) | Log level for the Lambda function | `string` | `"INFO"` | no | | [log\_retention\_in\_days](#input\_log\_retention\_in\_days) | CloudWatch log retention period in days | `number` | `0` | no | diff --git a/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf b/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf index 2f8e3c28..5a9fbae3 100644 --- a/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf +++ b/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf @@ -82,7 +82,7 @@ data "aws_iam_policy_document" "https_client_lambda" { } dynamic "statement" { - for_each = var.lambda_security_group_id != "" ? [1] : [] + for_each = var.delivery_lambda_security_group_id != "" ? [1] : [] content { sid = "VPCNetworkInterfacePermissions" effect = "Allow" diff --git a/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf b/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf index a1bb48f2..3f9ddb70 100644 --- a/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf +++ b/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf @@ -18,14 +18,14 @@ module "https_client_lambda" { body = data.aws_iam_policy_document.https_client_lambda.json } - function_s3_bucket = var.lambda_s3_bucket - function_code_base_path = var.lambda_code_base_path + function_s3_bucket = var.delivery_lambda_s3_bucket + function_code_base_path = var.delivery_lambda_code_base_path function_code_dir = "https-client-lambda/dist" function_include_common = true handler_function_name = "handler" runtime = "nodejs22.x" - memory = var.lambda_memory - timeout = var.lambda_timeout + memory = var.delivery_lambda_memory + timeout = var.delivery_lambda_timeout log_level = var.log_level force_lambda_code_deploy = var.force_lambda_code_deploy @@ -53,19 +53,22 @@ module "https_client_lambda" { MTLS_CERT_S3_KEY = var.mtls_cert_s3_key # gitleaks:allow QUEUE_URL = module.sqs_delivery.sqs_queue_url TOKEN_BUCKET_BURST_CAPACITY = tostring(var.token_bucket_burst_capacity) + CB_COOLDOWN_PERIOD_MS = tostring(var.cb_cooldown_period_ms) + CB_RECOVERY_PERIOD_MS = tostring(var.cb_recovery_period_ms) } - vpc_config = var.lambda_security_group_id != "" ? { + vpc_config = var.delivery_lambda_security_group_id != "" ? { subnet_ids = var.vpc_subnet_ids - security_group_ids = [var.lambda_security_group_id] + security_group_ids = [var.delivery_lambda_security_group_id] } : null } resource "aws_lambda_event_source_mapping" "sqs_delivery" { - event_source_arn = module.sqs_delivery.sqs_queue_arn - function_name = module.https_client_lambda.function_arn - batch_size = var.lambda_batch_size - enabled = true + event_source_arn = module.sqs_delivery.sqs_queue_arn + function_name = module.https_client_lambda.function_arn + batch_size = var.delivery_lambda_batch_size + maximum_batching_window_in_seconds = var.delivery_lambda_batching_window_sec + enabled = true function_response_types = ["ReportBatchItemFailures"] } diff --git a/infrastructure/terraform/modules/client-delivery/variables.tf b/infrastructure/terraform/modules/client-delivery/variables.tf index 46f66f45..ebdcdc75 100644 --- a/infrastructure/terraform/modules/client-delivery/variables.tf +++ b/infrastructure/terraform/modules/client-delivery/variables.tf @@ -75,12 +75,12 @@ variable "applications_map_parameter_name" { description = "SSM Parameter Store path for the clientId-to-applicationData map" } -variable "lambda_s3_bucket" { +variable "delivery_lambda_s3_bucket" { type = string description = "S3 bucket for Lambda function artefacts" } -variable "lambda_code_base_path" { +variable "delivery_lambda_code_base_path" { type = string description = "Base path to Lambda source code directories" } @@ -115,19 +115,25 @@ variable "log_subscription_role_arn" { default = "" } -variable "lambda_batch_size" { +variable "delivery_lambda_batch_size" { type = number description = "Number of SQS messages per Lambda invocation" - default = 10 + default = 100 } -variable "lambda_memory" { +variable "delivery_lambda_batching_window_sec" { + type = number + description = "Maximum time in seconds to wait for a full batch before invoking Lambda. Allows the delivery queue to fill to batch_size, improving Lambda concurrency utilisation." + default = 1 +} + +variable "delivery_lambda_memory" { type = number description = "Lambda memory allocation in MB" default = 256 } -variable "lambda_timeout" { +variable "delivery_lambda_timeout" { type = number description = "Lambda timeout in seconds" default = 30 @@ -205,8 +211,20 @@ variable "vpc_subnet_ids" { default = [] } -variable "lambda_security_group_id" { +variable "delivery_lambda_security_group_id" { type = string description = "Security group ID for the Lambda function" default = "" } + +variable "cb_cooldown_period_ms" { + type = number + description = "Full block duration after circuit opens, before half-open probes begin (ms)" + default = 120000 +} + +variable "cb_recovery_period_ms" { + type = number + description = "Linear ramp-up duration after circuit closes (ms)" + default = 600000 +} diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 14da4bd2..f337f7c7 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -288,6 +288,28 @@ describe("processRecords", () => { expect(mockChangeVisibility).toHaveBeenCalledTimes(1); }); + it("caps visibility delay at SQS maximum (12 hours) for admission-denied batch", async () => { + mockAdmit.mockResolvedValue({ + allowed: false, + reason: "rate_limited", + retryAfterMs: 60_000, + effectiveRate: 10, + }); + + const record = makeRecord({ + attributes: { + ApproximateReceiveCount: "1000", + SentTimestamp: "0", + SenderId: "sender", + ApproximateFirstReceiveTimestamp: "0", + }, + }); + + await processRecords([record]); + + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 43_200); + }); + it("changes visibility once for transient failure", async () => { mockDeliverPayload.mockResolvedValue({ outcome: "transient_failure", @@ -380,8 +402,7 @@ describe("processRecords", () => { expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); const visibilityDelay = mockChangeVisibility.mock.calls[0]![1] as number; - expect(visibilityDelay).toBeGreaterThanOrEqual(2); - expect(visibilityDelay).toBeLessThanOrEqual(6); + expect(visibilityDelay).toBe(2); expect(mockSendToDlq).not.toHaveBeenCalled(); expect(mockDeliverPayload).not.toHaveBeenCalled(); }); @@ -398,8 +419,7 @@ describe("processRecords", () => { expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); const visibilityDelay = mockChangeVisibility.mock.calls[0]![1] as number; - expect(visibilityDelay).toBeGreaterThanOrEqual(30); - expect(visibilityDelay).toBeLessThanOrEqual(34); + expect(visibilityDelay).toBe(30); expect(mockSendToDlq).not.toHaveBeenCalled(); expect(mockDeliverPayload).not.toHaveBeenCalled(); }); diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index 782777f2..4c195ed5 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -43,11 +43,12 @@ import { flushMetrics, resetMetrics } from "services/delivery-metrics"; type RedisClientType = Awaited>; const DEFAULT_MAX_RETRY_DURATION_MS = 7_200_000; // 2 hours -const DEFAULT_CONCURRENCY_LIMIT = 5; +const DEFAULT_CONCURRENCY_LIMIT = 10; const BURST_MULTIPLIER = 5; const MAX_BURST_CAPACITY = Number( process.env.TOKEN_BUCKET_BURST_CAPACITY ?? "2250", ); +const SQS_MAX_VISIBILITY_TIMEOUT_SEC = 43_200; // 12 hours const gateConfig: EndpointGateConfig = { // Max tokens the bucket can hold — absorbs short traffic bursts without throttling @@ -118,6 +119,15 @@ async function deliverRecord( clientId: string, ): Promise<{ success: boolean; dlq: boolean }> { const correlationId = extractCorrelationId(message); + const receiveCount = Number(record.attributes.ApproximateReceiveCount); + + logger.info("Processing delivery record", { + correlationId, + receiveCount, + firstReceivedAt: new Date( + Number(record.attributes.ApproximateFirstReceiveTimestamp), + ).toISOString(), + }); const maxRetryDurationMs = target.delivery?.maxRetryDurationSeconds === undefined @@ -147,7 +157,7 @@ async function deliverRecord( message.targetId, correlationId, record.messageId, - Number(record.attributes.ApproximateReceiveCount), + receiveCount, ); const deliveryStart = Date.now(); const result = await deliverPayload(target, payloadJson, signature, agent); @@ -171,7 +181,6 @@ async function deliverRecord( } if (result.outcome === OUTCOME_RATE_LIMITED) { - const receiveCount = Number(record.attributes.ApproximateReceiveCount); recordDeliveryRateLimited(clientId, message.targetId, correlationId); await handleRateLimitedRecord( record, @@ -183,7 +192,6 @@ async function deliverRecord( return { success: true, dlq: false }; } - const receiveCount = Number(record.attributes.ApproximateReceiveCount); const backoffSec = jitteredBackoffSeconds(receiveCount); recordDeliveryFailure( clientId, @@ -209,14 +217,17 @@ async function handleBatchDenied( reason: string, retryAfterMs: number, ): Promise { - const delaySec = Math.ceil(retryAfterMs / 1000); + const baseDelaySec = Math.max(1, Math.ceil(retryAfterMs / 1000)); const correlationIds = batch.messages.map((m) => extractCorrelationId(m)); recordAdmissionDenied(clientId, batch.targetId, reason, correlationIds); const failures: SQSBatchItemFailure[] = []; for (const record of batch.records) { - // eslint-disable-next-line sonarjs/pseudo-random -- jitter for backoff, not security-sensitive - const jitterSec = Math.floor(Math.random() * 5); - await changeVisibility(record.receiptHandle, delaySec + jitterSec); + const receiveCount = Number(record.attributes.ApproximateReceiveCount); + const delaySec = Math.min( + receiveCount * baseDelaySec, + SQS_MAX_VISIBILITY_TIMEOUT_SEC, + ); + await changeVisibility(record.receiptHandle, delaySec); failures.push({ itemIdentifier: record.messageId }); } return { failures, deliveredCount: 0, dlqCount: 0 }; @@ -264,23 +275,30 @@ async function processTargetBatch( const failures: SQSBatchItemFailure[] = []; let processingFailures = 0; + const admittedPairs = admitted.map( + (record, i): { record: SQSRecord; message: CallbackDeliveryMessage } => ({ + record, + message: admittedMessages[i], // eslint-disable-line security/detect-object-injection -- i is the numeric index from .map(), not user input + }), + ); + const deliveryResults = await pMap( - admitted, - async ( + admittedPairs, + async ({ + message, record, - index, - ): Promise<{ record: SQSRecord; success: boolean; dlq: boolean }> => { + }): Promise<{ record: SQSRecord; success: boolean; dlq: boolean }> => { try { const outcome = await deliverRecord( record, - admittedMessages[index], + message, target, applicationId, clientId, ); return { record, success: outcome.success, dlq: outcome.dlq }; } catch (error) { - const correlationId = extractCorrelationId(admittedMessages[index]); + const correlationId = extractCorrelationId(message); logger.error("Failed to process record", { messageId: record.messageId, correlationId, @@ -348,7 +366,9 @@ async function processTargetBatch( rejectedCorrelationIds, ); for (const record of rejected) { - await changeVisibility(record.receiptHandle, 1); + const receiveCount = Number(record.attributes.ApproximateReceiveCount); + const delaySec = receiveCount * 1; + await changeVisibility(record.receiptHandle, delaySec); failures.push({ itemIdentifier: record.messageId }); } } diff --git a/lambdas/mock-webhook-lambda/src/__tests__/index.test.ts b/lambdas/mock-webhook-lambda/src/__tests__/index.test.ts index d7463722..fefa87ed 100644 --- a/lambdas/mock-webhook-lambda/src/__tests__/index.test.ts +++ b/lambdas/mock-webhook-lambda/src/__tests__/index.test.ts @@ -367,6 +367,54 @@ describe("Mock Webhook Lambda", () => { const body = JSON.parse(result.body); expect(body.message).toBe("Forced status 500"); }); + + it("should return forced status code when messageId uses timed format and deadline is in the future", async () => { + const futureMs = Date.now() + 60_000; + const callback = { + data: [ + { + type: "MessageStatus", + attributes: { + messageId: `force-500-until-${futureMs}-some-uuid`, + messageStatus: "delivered", + }, + links: { message: "some-message-link" }, + meta: { idempotencyKey: "some-idempotency-key" }, + }, + ], + }; + + const event = createMockEvent(JSON.stringify(callback)); + const result = await handler(event); + + expect(result.statusCode).toBe(500); + const body = JSON.parse(result.body); + expect(body.message).toBe("Forced status 500"); + }); + + it("should return 200 when messageId uses timed format and deadline has passed", async () => { + const pastMs = Date.now() - 60_000; + const callback = { + data: [ + { + type: "MessageStatus", + attributes: { + messageId: `force-500-until-${pastMs}-some-uuid`, + messageStatus: "delivered", + }, + links: { message: "some-message-link" }, + meta: { idempotencyKey: "some-idempotency-key" }, + }, + ], + }; + + const event = createMockEvent(JSON.stringify(callback)); + const result = await handler(event); + + expect(result.statusCode).toBe(200); + const body = JSON.parse(result.body); + expect(body.message).toBe("Callback received"); + }); }); describe("Logging", () => { diff --git a/lambdas/mock-webhook-lambda/src/index.ts b/lambdas/mock-webhook-lambda/src/index.ts index d0bf582d..ea30c0e6 100644 --- a/lambdas/mock-webhook-lambda/src/index.ts +++ b/lambdas/mock-webhook-lambda/src/index.ts @@ -57,153 +57,232 @@ function isClientCallbackPayload( }); } -async function buildResponse( - event: APIGatewayProxyEvent, -): Promise { - const eventWithContextFields = event as APIGatewayProxyEvent & { - rawPath?: string; - requestContext?: { - http?: { method?: string }; - elb?: { targetGroupArn: string }; - }; +type EventWithContextFields = APIGatewayProxyEvent & { + rawPath?: string; + requestContext?: { + http?: { method?: string }; + elb?: { targetGroupArn: string }; }; - const headers = Object.fromEntries( +}; + +function normalizeHeaders( + event: APIGatewayProxyEvent, +): Record { + return Object.fromEntries( Object.entries(event.headers).map(([k, v]) => [String(k).toLowerCase(), v]), ) as Record; +} - const path = event.path ?? eventWithContextFields.rawPath; +function resolveMtlsStatus( + headers: Record, + isAlbInvocation: boolean, +): boolean { + if (!isAlbInvocation) { + return false; + } - const isAlbInvocation = Boolean(eventWithContextFields.requestContext?.elb); const clientCertPresent = Boolean(headers["x-amzn-mtls-clientcert"]); - let isMtls = false; - if (isAlbInvocation) { - const certResult = verifyClientCertificate( - headers["x-amzn-mtls-clientcert"], - ); - isMtls = certResult.valid; - if (isMtls) { - logger.info("mTLS client certificate verified", { - fingerprint: headers["x-amzn-mtls-clientcert-fingerprint"] ?? "", - isMtls: true, - }); - } else { - logger.info("Mock webhook invoked without mTLS", { - isMtls: false, - clientCertPresent, - reason: certResult.reason, - }); - } + const certResult = verifyClientCertificate(headers["x-amzn-mtls-clientcert"]); + + if (certResult.valid) { + logger.info("mTLS client certificate verified", { + fingerprint: headers["x-amzn-mtls-clientcert-fingerprint"] ?? "", + isMtls: true, + }); + return true; } - logger.info("Mock webhook invoked", { - path, - method: event.httpMethod, - hasBody: Boolean(event.body), - isMtls, + logger.info("Mock webhook invoked without mTLS", { + isMtls: false, clientCertPresent, - "x-api-key": headers["x-api-key"], - "x-hmac-sha256-signature": headers["x-hmac-sha256-signature"], - payload: event.body, + reason: certResult.reason, }); + return false; +} +function authenticateApiKey(headers: Record): { + error: APIGatewayProxyResult | undefined; +} { const expectedApiKey = process.env.API_KEY; const providedApiKey = headers["x-api-key"]; if (!expectedApiKey || providedApiKey !== expectedApiKey) { logger.error("Unauthorized: invalid or missing x-api-key"); return { - statusCode: 401, - body: JSON.stringify({ message: "Unauthorized" }), + error: { + statusCode: 401, + body: JSON.stringify({ message: "Unauthorized" }), + }, }; } - if (!event.body) { - logger.error("No event body received"); + return { error: undefined }; +} - return { +type ParseResult = { + payload: ClientCallbackPayload | undefined; + error: APIGatewayProxyResult | undefined; +}; + +function parseError(response: APIGatewayProxyResult): ParseResult { + return { payload: undefined, error: response }; +} + +function parseAndValidateBody(body: string | null): ParseResult { + if (!body) { + logger.error("No event body received"); + return parseError({ statusCode: 400, body: JSON.stringify({ message: "No body" }), - }; + }); } try { - const parsed = JSON.parse(event.body) as unknown; - + const parsed = JSON.parse(body) as unknown; logger.info("Mock webhook parsed payload", { parsedPayload: parsed }); if (!isClientCallbackPayload(parsed)) { logger.error("Invalid message structure - missing or invalid data array"); - - return { + return parseError({ statusCode: 400, body: JSON.stringify({ message: "Invalid message structure" }), - }; + }); } if (parsed.data.length !== 1) { logger.error("Expected exactly 1 callback item in data array", { receivedCount: parsed.data.length, }); - - return { + return parseError({ statusCode: 400, body: JSON.stringify({ message: `Expected exactly 1 callback item, got ${parsed.data.length}`, }), - }; + }); + } + + return { payload: parsed, error: undefined }; + } catch (error) { + if (error instanceof SyntaxError) { + logger.error("Invalid JSON body", { error: error.message }); + return parseError({ + statusCode: 400, + body: JSON.stringify({ message: "Invalid JSON body" }), + }); } - const [item] = parsed.data; - const correlationId = item.meta.idempotencyKey; - const { messageId } = item.attributes; - const forcedStatusMatch = /^force-(\d{3})-/.exec(messageId); - if (forcedStatusMatch) { - const statusCode = Number(forcedStatusMatch[1]); - logger.info("Forced status code response", { + logger.error("Failed to process callback", { + error: error instanceof Error ? error.message : String(error), + }); + return parseError({ + statusCode: 500, + body: JSON.stringify({ message: "Internal server error" }), + }); + } +} + +function checkForcedStatusResponse( + messageId: string, + correlationId: string, +): { response: APIGatewayProxyResult | undefined } { + const timedMatch = /^force-(\d{3})-until-(\d+)-/.exec(messageId); + if (timedMatch) { + const statusCode = Number(timedMatch[1]); + const until = Number(timedMatch[2]); + if (Date.now() < until) { + logger.info("Timed forced status code response", { correlationId, messageId, statusCode, + until, }); return { - statusCode, - body: JSON.stringify({ message: `Forced status ${statusCode}` }), + response: { + statusCode, + body: JSON.stringify({ message: `Forced status ${statusCode}` }), + }, }; } + return { response: undefined }; + } - logger.info("Callback received", { + const permanentMatch = /^force-(\d{3})-/.exec(messageId); + if (permanentMatch) { + const statusCode = Number(permanentMatch[1]); + logger.info("Forced status code response", { correlationId, messageId, - callbackType: item.type, - path, - isMtls, - apiKey: providedApiKey, - signature: headers["x-hmac-sha256-signature"] ?? "", - payload: JSON.stringify(item), + statusCode, }); - return { - statusCode: 200, - body: JSON.stringify({ message: "Callback received" }), + response: { + statusCode, + body: JSON.stringify({ message: `Forced status ${statusCode}` }), + }, }; - } catch (error) { - if (error instanceof SyntaxError) { - logger.error("Invalid JSON body", { error: error.message }); + } - return { - statusCode: 400, - body: JSON.stringify({ message: "Invalid JSON body" }), - }; - } + return { response: undefined }; +} - logger.error("Failed to process callback", { - error: error instanceof Error ? error.message : String(error), - }); +async function buildResponse( + event: APIGatewayProxyEvent, +): Promise { + const eventWithContextFields = event as EventWithContextFields; + const headers = normalizeHeaders(event); + const path = event.path ?? eventWithContextFields.rawPath; + const isAlbInvocation = Boolean(eventWithContextFields.requestContext?.elb); + const clientCertPresent = Boolean(headers["x-amzn-mtls-clientcert"]); + const isMtls = resolveMtlsStatus(headers, isAlbInvocation); - return { - statusCode: 500, - body: JSON.stringify({ message: "Internal server error" }), - }; + logger.info("Mock webhook invoked", { + path, + method: event.httpMethod, + hasBody: Boolean(event.body), + isMtls, + clientCertPresent, + "x-api-key": headers["x-api-key"], + "x-hmac-sha256-signature": headers["x-hmac-sha256-signature"], + payload: event.body, + }); + + const authResult = authenticateApiKey(headers); + if (authResult.error) { + return authResult.error; + } + + const bodyResult = parseAndValidateBody(event.body); + if (bodyResult.error) { + return bodyResult.error; + } + + const [item] = bodyResult.payload!.data; + const correlationId = item.meta.idempotencyKey; + const { messageId } = item.attributes; + + const { response: forcedResponse } = checkForcedStatusResponse( + messageId, + correlationId, + ); + if (forcedResponse) { + return forcedResponse; } + + logger.info("Callback received", { + correlationId, + messageId, + callbackType: item.type, + path, + isMtls, + apiKey: headers["x-api-key"], + signature: headers["x-hmac-sha256-signature"] ?? "", + payload: JSON.stringify(item), + }); + + return { + statusCode: 200, + body: JSON.stringify({ message: "Callback received" }), + }; } export async function handler( diff --git a/lambdas/perf-runner-lambda/src/__tests__/event-factories.test.ts b/lambdas/perf-runner-lambda/src/__tests__/event-factories.test.ts index 1c877a17..dcecd707 100644 --- a/lambdas/perf-runner-lambda/src/__tests__/event-factories.test.ts +++ b/lambdas/perf-runner-lambda/src/__tests__/event-factories.test.ts @@ -25,6 +25,25 @@ describe("createMessageStatusEvent", () => { expect(a.id).not.toBe(b.id); expect(a.data.messageId).not.toBe(b.data.messageId); }); + + it("prefixes messageId with force-{code}- when forcedStatusCode is set", () => { + const event = createMessageStatusEvent("perf-client-1", "DELIVERED", 500); + + expect(event.data.messageId).toMatch(/^force-500-[0-9a-f-]+$/); + }); + + it("prefixes messageId with force-{code}-until-{timestamp}- when both forced fields are set", () => { + const until = Date.now() + 60_000; + const event = createMessageStatusEvent( + "perf-client-1", + "DELIVERED", + 500, + until, + ); + + const prefix = `force-500-until-${until}-`; + expect(event.data.messageId.startsWith(prefix)).toBe(true); + }); }); describe("createChannelStatusEvent", () => { @@ -39,6 +58,25 @@ describe("createChannelStatusEvent", () => { expect(event.data.messageId).toBeTruthy(); expect(event.id).toBeTruthy(); }); + + it("prefixes messageId with force-{code}- when forcedStatusCode is set", () => { + const event = createChannelStatusEvent("perf-client-2", "DELIVERED", 503); + + expect(event.data.messageId).toMatch(/^force-503-[0-9a-f-]+$/); + }); + + it("prefixes messageId with force-{code}-until-{timestamp}- when both forced fields are set", () => { + const until = Date.now() + 60_000; + const event = createChannelStatusEvent( + "perf-client-2", + "DELIVERED", + 503, + until, + ); + + const prefix = `force-503-until-${until}-`; + expect(event.data.messageId.startsWith(prefix)).toBe(true); + }); }); describe("createEvent", () => { @@ -65,4 +103,19 @@ describe("createEvent", () => { expect(event.type).toBe(EventTypes.CHANNEL_STATUS_PUBLISHED); expect(event.data.clientId).toBe("perf-client-2"); }); + + it("forwards forcedStatusCode and forcedStatusCodeUntilMs from the mix entry", () => { + const until = Date.now() + 60_000; + const event = createEvent({ + weight: 1, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "DELIVERED", + forcedStatusCode: 500, + forcedStatusCodeUntilMs: until, + }); + + const prefix = `force-500-until-${until}-`; + expect(event.data.messageId.startsWith(prefix)).toBe(true); + }); }); diff --git a/lambdas/perf-runner-lambda/src/__tests__/index.test.ts b/lambdas/perf-runner-lambda/src/__tests__/index.test.ts index 3c33bfd6..b1b5687e 100644 --- a/lambdas/perf-runner-lambda/src/__tests__/index.test.ts +++ b/lambdas/perf-runner-lambda/src/__tests__/index.test.ts @@ -1,6 +1,5 @@ import { handler } from "index"; -import type { PerformanceResult } from "types"; -import { DEFAULT_SCENARIO } from "scenario"; +import type { PerformanceResult, Scenario } from "types"; import { runPerformanceTest } from "runner"; @@ -24,9 +23,22 @@ const mockRunPerformanceTest = runPerformanceTest as jest.MockedFunction< typeof runPerformanceTest >; +const testScenario: Scenario = { + phases: [{ durationSecs: 5, targetEps: 100 }], + eventMix: [ + { + weight: 1, + factory: "messageStatus", + clientId: "perf-client-1", + messageStatus: "DELIVERED", + }, + ], + metricsIntervalSecs: 15, +}; + const mockResult: PerformanceResult = { testId: "test-id", - scenario: DEFAULT_SCENARIO, + scenario: testScenario, startedAt: "2026-04-09T10:00:00.000Z", completedAt: "2026-04-09T10:02:00.000Z", phases: [], @@ -53,7 +65,7 @@ beforeEach(() => { describe("handler", () => { it("calls runPerformanceTest with the provided testId and scenario", async () => { - const result = await handler({ testId: "test-id" }); + const result = await handler({ testId: "test-id", scenario: testScenario }); expect(result).toEqual(mockResult); expect(mockRunPerformanceTest).toHaveBeenCalledWith( @@ -63,7 +75,7 @@ describe("handler", () => { deliveryLogGroupPrefix: "/aws/lambda/nhs-dev-callbacks-https-client-", mockWebhookLogGroup: "/aws/lambda/nhs-dev-callbacks-mock-webhook", }), - DEFAULT_SCENARIO, + testScenario, "test-id", undefined, expect.objectContaining({ @@ -72,12 +84,14 @@ describe("handler", () => { iamUsername: "test-user", region: "eu-west-2", }), + undefined, + undefined, ); }); it("uses a custom scenario when one is provided in the event", async () => { const customScenario = { - ...DEFAULT_SCENARIO, + ...testScenario, phases: [{ durationSecs: 5, targetEps: 500 }], }; @@ -89,6 +103,8 @@ describe("handler", () => { "custom-test", undefined, expect.anything(), + undefined, + undefined, ); }); @@ -99,16 +115,18 @@ describe("handler", () => { mockRunPerformanceTest.mockRejectedValue(new Error("test failure")); - await expect(handler({ testId: "failing-test" })).rejects.toThrow( - "test failure", - ); + await expect( + handler({ testId: "failing-test", scenario: testScenario }), + ).rejects.toThrow("test failure"); expect(mockDestroy).toHaveBeenCalled(); }); it("throws when INBOUND_QUEUE_URL is missing", async () => { delete process.env.INBOUND_QUEUE_URL; - await expect(handler({ testId: "missing-queue-test" })).rejects.toThrow( + await expect( + handler({ testId: "missing-queue-test", scenario: testScenario }), + ).rejects.toThrow( "Missing required environment variable: INBOUND_QUEUE_URL", ); }); @@ -117,7 +135,9 @@ describe("handler", () => { delete process.env.TRANSFORM_FILTER_LOG_GROUP; delete process.env.AWS_REGION; - await expect(handler({ testId: "missing-log-group-test" })).rejects.toThrow( + await expect( + handler({ testId: "missing-log-group-test", scenario: testScenario }), + ).rejects.toThrow( "Missing required environment variable: TRANSFORM_FILTER_LOG_GROUP", ); }); @@ -125,16 +145,18 @@ describe("handler", () => { it("passes undefined deliveryLogGroupPrefix when env var is not set", async () => { delete process.env.DELIVERY_LOG_GROUP_PREFIX; - await handler({ testId: "no-prefix-test" }); + await handler({ testId: "no-prefix-test", scenario: testScenario }); expect(mockRunPerformanceTest).toHaveBeenCalledWith( expect.objectContaining({ deliveryLogGroupPrefix: undefined, }), - DEFAULT_SCENARIO, + testScenario, "no-prefix-test", undefined, expect.anything(), + undefined, + undefined, ); }); @@ -143,19 +165,21 @@ describe("handler", () => { delete process.env.ELASTICACHE_CACHE_NAME; delete process.env.ELASTICACHE_IAM_USERNAME; - await handler({ testId: "no-cache-test" }); + await handler({ testId: "no-cache-test", scenario: testScenario }); expect(mockRunPerformanceTest).toHaveBeenCalledWith( expect.anything(), - DEFAULT_SCENARIO, + testScenario, "no-cache-test", undefined, undefined, + undefined, + undefined, ); }); it("passes mockWebhookLogGroup from env var", async () => { - await handler({ testId: "webhook-test" }); + await handler({ testId: "webhook-test", scenario: testScenario }); expect(mockRunPerformanceTest).toHaveBeenCalledWith( expect.objectContaining({ @@ -165,6 +189,44 @@ describe("handler", () => { "webhook-test", undefined, expect.anything(), + undefined, + undefined, + ); + }); + + it("passes cloudWatchSettlingMs when provided in the event", async () => { + await handler({ + testId: "settling-test", + scenario: testScenario, + cloudWatchSettlingMs: 5000, + }); + + expect(mockRunPerformanceTest).toHaveBeenCalledWith( + expect.anything(), + expect.anything(), + "settling-test", + undefined, + expect.anything(), + 5000, + undefined, + ); + }); + + it("passes skipPurge when provided in the event", async () => { + await handler({ + testId: "skip-purge-test", + scenario: testScenario, + skipPurge: true, + }); + + expect(mockRunPerformanceTest).toHaveBeenCalledWith( + expect.anything(), + expect.anything(), + "skip-purge-test", + undefined, + expect.anything(), + undefined, + true, ); }); }); diff --git a/lambdas/perf-runner-lambda/src/__tests__/purge.test.ts b/lambdas/perf-runner-lambda/src/__tests__/purge.test.ts index 14bcf247..52832910 100644 --- a/lambdas/perf-runner-lambda/src/__tests__/purge.test.ts +++ b/lambdas/perf-runner-lambda/src/__tests__/purge.test.ts @@ -30,7 +30,7 @@ describe("deriveQueueUrls", () => { expect(urls).toEqual([ "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-queue", - "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-dlq-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-dlq", "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-1-delivery-queue", "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-1-delivery-dlq-queue", "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-2-delivery-queue", @@ -61,7 +61,7 @@ describe("deriveQueueUrls", () => { expect(urls).toEqual([ "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-queue", - "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-dlq-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-dlq", "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-1-delivery-queue", "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-1-delivery-dlq-queue", ]); @@ -88,20 +88,8 @@ describe("purgeQueues", () => { expect(mockSend).toHaveBeenCalledTimes(2); }); - it("ignores QueueDoesNotExist errors gracefully", async () => { - const nonExistentError = Object.assign(new Error("Queue does not exist"), { - name: "QueueDoesNotExist", - }); - mockSend.mockRejectedValueOnce(nonExistentError); - - await expect( - purgeQueues(mockSqsClient, ["https://sqs.example.invalid/missing"]), - ).resolves.toBeUndefined(); - }); - - it("rethrows non-QueueDoesNotExist errors", async () => { - const otherError = new Error("Access denied"); - mockSend.mockRejectedValueOnce(otherError); + it("throws when a purge fails", async () => { + mockSend.mockRejectedValueOnce(new Error("Access denied")); await expect( purgeQueues(mockSqsClient, ["https://sqs.example.invalid/queue"]), diff --git a/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts b/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts index 622e98a4..9d7acfe5 100644 --- a/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts +++ b/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts @@ -12,6 +12,7 @@ import { defaultSleep, runPerformanceTest } from "runner"; import { generatePhaseLoad } from "sqs"; import { deriveQueueUrls, purgeQueues } from "purge"; +import { getQueueDepths } from "sqs-stats"; import { dumpRateLimitState, flushElastiCache } from "elasticache"; import { verifyMockWebhook } from "webhook-verify"; import { @@ -26,6 +27,7 @@ jest.mock("cloudwatch"); jest.mock("purge"); jest.mock("elasticache"); jest.mock("webhook-verify"); +jest.mock("sqs-stats"); const mockGeneratePhaseLoad = jest.mocked(generatePhaseLoad); const mockQueryMetricsSnapshot = jest.mocked(queryMetricsSnapshot); @@ -41,6 +43,7 @@ const mockPurgeQueues = jest.mocked(purgeQueues); const mockFlushElastiCache = jest.mocked(flushElastiCache); const mockDumpRateLimitState = jest.mocked(dumpRateLimitState); const mockVerifyMockWebhook = jest.mocked(verifyMockWebhook); +const mockGetQueueDepths = jest.mocked(getQueueDepths); const immediateSleep = jest.fn().mockResolvedValue(undefined); @@ -118,6 +121,16 @@ beforeEach(() => { receivedCallbacks: 0, verified: false, }); + mockGetQueueDepths.mockResolvedValue({ + timestampMs: Date.now(), + queues: [ + { + queueUrl: "https://sqs.example.invalid/inbound-event-queue", + visible: 100, + notVisible: 10, + }, + ], + }); immediateSleep.mockResolvedValue(undefined); }); @@ -557,6 +570,22 @@ describe("runPerformanceTest", () => { expect(mockPurgeQueues).toHaveBeenCalledTimes(2); }); + it("skips both purges when skipPurge is true", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(null); + + await runPerformanceTest( + deps, + scenario, + "test-skip-purge", + immediateSleep, + undefined, + undefined, + true, + ); + + expect(mockPurgeQueues).not.toHaveBeenCalled(); + }); + it("flushes ElastiCache before and after when deps are provided", async () => { mockQueryMetricsSnapshot.mockResolvedValue(null); const elastiCacheDeps = { @@ -574,7 +603,7 @@ describe("runPerformanceTest", () => { elastiCacheDeps, ); - expect(mockFlushElastiCache).toHaveBeenCalledTimes(2); + expect(mockFlushElastiCache).toHaveBeenCalledTimes(1); expect(mockFlushElastiCache).toHaveBeenCalledWith(elastiCacheDeps); }); @@ -682,6 +711,37 @@ describe("runPerformanceTest", () => { expect(mockVerifyMockWebhook).not.toHaveBeenCalled(); expect(result.webhookVerification).toBeUndefined(); }); + + it("samples queue depths during polling and at final snapshot", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(null); + + await runPerformanceTest( + deps, + scenario, + "test-queue-depths", + immediateSleep, + ); + + expect(mockGetQueueDepths).toHaveBeenCalledTimes(2); // one mid-test, one final + expect(mockGetQueueDepths).toHaveBeenCalledWith(deps.sqsClient, [ + "https://sqs.example.invalid/inbound-event-queue", + ]); + }); + + it("uses the provided cloudWatchSettlingMs instead of the default", async () => { + mockQueryMetricsSnapshot.mockResolvedValue(null); + + await runPerformanceTest( + deps, + scenario, + "test-settling", + immediateSleep, + undefined, + 5000, + ); + + expect(immediateSleep).toHaveBeenCalledWith(5000); + }); }); describe("defaultSleep", () => { diff --git a/lambdas/perf-runner-lambda/src/__tests__/sqs-stats.test.ts b/lambdas/perf-runner-lambda/src/__tests__/sqs-stats.test.ts new file mode 100644 index 00000000..8d2900b8 --- /dev/null +++ b/lambdas/perf-runner-lambda/src/__tests__/sqs-stats.test.ts @@ -0,0 +1,75 @@ +import type { SQSClient } from "@aws-sdk/client-sqs"; +import { getQueueDepths } from "sqs-stats"; + +describe("getQueueDepths", () => { + const mockSend = jest.fn(); + const mockSqsClient = { send: mockSend } as unknown as SQSClient; + + beforeEach(() => { + jest.clearAllMocks(); + }); + + it("returns visible and notVisible counts for each queue URL", async () => { + mockSend + .mockResolvedValueOnce({ + Attributes: { + ApproximateNumberOfMessages: "42", + ApproximateNumberOfMessagesNotVisible: "8", + }, + }) + .mockResolvedValueOnce({ + Attributes: { + ApproximateNumberOfMessages: "10", + ApproximateNumberOfMessagesNotVisible: "2", + }, + }); + + const result = await getQueueDepths(mockSqsClient, [ + "https://sqs.example.invalid/queue-a", + "https://sqs.example.invalid/queue-b", + ]); + + expect(result.queues).toHaveLength(2); + expect(result.queues[0]).toEqual({ + queueUrl: "https://sqs.example.invalid/queue-a", + visible: 42, + notVisible: 8, + }); + expect(result.queues[1]).toEqual({ + queueUrl: "https://sqs.example.invalid/queue-b", + visible: 10, + notVisible: 2, + }); + expect(result.timestampMs).toBeGreaterThan(0); + }); + + it("defaults to 0 when attributes are missing", async () => { + mockSend.mockResolvedValueOnce({ Attributes: undefined }); + + const result = await getQueueDepths(mockSqsClient, [ + "https://sqs.example.invalid/queue-a", + ]); + + expect(result.queues[0].visible).toBe(0); + expect(result.queues[0].notVisible).toBe(0); + }); + + it("sends GetQueueAttributesCommand with correct attributes requested", async () => { + mockSend.mockResolvedValueOnce({ Attributes: {} }); + + await getQueueDepths(mockSqsClient, [ + "https://sqs.example.invalid/queue-a", + ]); + + const command = mockSend.mock.calls[0][0] as { + input: { QueueUrl: string; AttributeNames: string[] }; + }; + expect(command.input.QueueUrl).toBe("https://sqs.example.invalid/queue-a"); + expect(command.input.AttributeNames).toContain( + "ApproximateNumberOfMessages", + ); + expect(command.input.AttributeNames).toContain( + "ApproximateNumberOfMessagesNotVisible", + ); + }); +}); diff --git a/lambdas/perf-runner-lambda/src/event-factories.ts b/lambdas/perf-runner-lambda/src/event-factories.ts index 6f39add9..c32cb9f7 100644 --- a/lambdas/perf-runner-lambda/src/event-factories.ts +++ b/lambdas/perf-runner-lambda/src/event-factories.ts @@ -8,11 +8,32 @@ import type { import { EventTypes } from "@nhs-notify-client-callbacks/models"; import type { EventMixEntry } from "types"; +function buildMessageId( + uuid: string, + forcedStatusCode?: number, + forcedStatusCodeUntilMs?: number, +): string { + if (forcedStatusCode === undefined) { + return uuid; + } + if (forcedStatusCodeUntilMs === undefined) { + return `force-${forcedStatusCode}-${uuid}`; + } + return `force-${forcedStatusCode}-until-${forcedStatusCodeUntilMs}-${uuid}`; +} + export function createMessageStatusEvent( clientId: string, messageStatus: MessageStatus, + forcedStatusCode?: number, + forcedStatusCodeUntilMs?: number, ): StatusPublishEvent { - const messageId = crypto.randomUUID(); + const uuid = crypto.randomUUID(); + const messageId = buildMessageId( + uuid, + forcedStatusCode, + forcedStatusCodeUntilMs, + ); const data: MessageStatusData = { clientId, @@ -47,8 +68,15 @@ export function createMessageStatusEvent( export function createChannelStatusEvent( clientId: string, channelStatus: ChannelStatus, + forcedStatusCode?: number, + forcedStatusCodeUntilMs?: number, ): StatusPublishEvent { - const messageId = crypto.randomUUID(); + const uuid = crypto.randomUUID(); + const messageId = buildMessageId( + uuid, + forcedStatusCode, + forcedStatusCodeUntilMs, + ); const data: ChannelStatusData = { clientId, @@ -80,8 +108,18 @@ export function createChannelStatusEvent( export function createEvent(entry: EventMixEntry): StatusPublishEvent { if (entry.factory === "messageStatus") { - return createMessageStatusEvent(entry.clientId, entry.messageStatus); + return createMessageStatusEvent( + entry.clientId, + entry.messageStatus, + entry.forcedStatusCode, + entry.forcedStatusCodeUntilMs, + ); } - return createChannelStatusEvent(entry.clientId, entry.channelStatus); + return createChannelStatusEvent( + entry.clientId, + entry.channelStatus, + entry.forcedStatusCode, + entry.forcedStatusCodeUntilMs, + ); } diff --git a/lambdas/perf-runner-lambda/src/index.ts b/lambdas/perf-runner-lambda/src/index.ts index 5974627b..2150241e 100644 --- a/lambdas/perf-runner-lambda/src/index.ts +++ b/lambdas/perf-runner-lambda/src/index.ts @@ -2,7 +2,6 @@ import { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; import { SQSClient } from "@aws-sdk/client-sqs"; import { Logger } from "@nhs-notify-client-callbacks/logger"; import { runPerformanceTest } from "runner"; -import { DEFAULT_SCENARIO } from "scenario"; import type { ElastiCacheDeps, PerfRunnerPayload, @@ -14,7 +13,7 @@ const logger = new Logger(); export async function handler( event: PerfRunnerPayload, ): Promise { - const { scenario = DEFAULT_SCENARIO, testId } = event; + const { cloudWatchSettlingMs, scenario, skipPurge, testId } = event; const region = process.env.AWS_REGION ?? "eu-west-2"; const queueUrl = process.env.INBOUND_QUEUE_URL; @@ -64,6 +63,8 @@ export async function handler( testId, undefined, elastiCacheDeps, + cloudWatchSettlingMs, + skipPurge, ); logger.info("Performance test completed", { testId }); diff --git a/lambdas/perf-runner-lambda/src/purge.ts b/lambdas/perf-runner-lambda/src/purge.ts index e363e706..3f7cb097 100644 --- a/lambdas/perf-runner-lambda/src/purge.ts +++ b/lambdas/perf-runner-lambda/src/purge.ts @@ -11,7 +11,7 @@ export function deriveQueueUrls( return [ inboundQueueUrl, - `${baseUrl}inbound-event-dlq-queue`, + `${baseUrl}inbound-event-dlq`, ...clientIds.flatMap((id) => [ `${baseUrl}${id}-delivery-queue`, `${baseUrl}${id}-delivery-dlq-queue`, @@ -23,18 +23,9 @@ export async function purgeQueues( client: SQSClient, queueUrls: string[], ): Promise { - const results = await Promise.allSettled( + await Promise.all( queueUrls.map((url) => client.send(new PurgeQueueCommand({ QueueUrl: url })), ), ); - - for (const result of results) { - if (result.status === "rejected") { - const error = result.reason as { name?: string }; - if (error.name !== "QueueDoesNotExist") { - throw result.reason as Error; - } - } - } } diff --git a/lambdas/perf-runner-lambda/src/runner.ts b/lambdas/perf-runner-lambda/src/runner.ts index 7a5b5ee6..86ccd435 100644 --- a/lambdas/perf-runner-lambda/src/runner.ts +++ b/lambdas/perf-runner-lambda/src/runner.ts @@ -11,8 +11,10 @@ import type { Scenario, WebhookVerificationResult, } from "types"; +import { Logger } from "@nhs-notify-client-callbacks/logger"; import { generatePhaseLoad } from "sqs"; import { deriveQueueUrls, purgeQueues } from "purge"; +import { getQueueDepths } from "sqs-stats"; import { dumpRateLimitState, flushElastiCache } from "elasticache"; import { verifyMockWebhook } from "webhook-verify"; import { @@ -22,6 +24,8 @@ import { queryPerClientRateTimeline, } from "cloudwatch"; +const logger = new Logger(); + const CLOUDWATCH_SETTLING_MS = 60_000; export const defaultSleep = (ms: number): Promise => @@ -82,12 +86,55 @@ async function collectSnapshots( return cbStartSec; } +async function collectPerClientRateTimelines( + deps: RunnerDeps, + scenario: Scenario, + startSec: number, + endSec: number, +): Promise { + if (!deps.deliveryLogGroupPrefix) { + return []; + } + + const clientIds = [...new Set(scenario.eventMix.map((e) => e.clientId))]; + const timelinePromises = clientIds.map(async (clientId) => { + const logGroupName = `${deps.deliveryLogGroupPrefix}${clientId}`; + const entries = await queryPerClientRateTimeline( + deps.cloudWatchClient, + logGroupName, + startSec, + endSec, + ); + return { clientId, entries }; + }); + const timelines = await Promise.all(timelinePromises); + return timelines.filter((t) => t.entries.length > 0); +} + +async function collectWebhookVerification( + deps: RunnerDeps, + startSec: number, + endSec: number, +): Promise { + if (!deps.mockWebhookLogGroup) { + return undefined; + } + return verifyMockWebhook( + deps.cloudWatchClient, + deps.mockWebhookLogGroup, + startSec, + endSec, + ); +} + export async function runPerformanceTest( deps: RunnerDeps, scenario: Scenario, testId: string, sleepFn: (ms: number) => Promise = defaultSleep, elastiCacheDeps?: ElastiCacheDeps, + cloudWatchSettlingMs: number = CLOUDWATCH_SETTLING_MS, + skipPurge = false, ): Promise { if (scenario.eventMix.length === 0) { throw new Error("scenario.eventMix must contain at least one entry"); @@ -109,8 +156,15 @@ export async function runPerformanceTest( const testStartMs = Date.now(); const queueUrls = deriveQueueUrls(deps.queueUrl, scenario); - await purgeQueues(deps.sqsClient, queueUrls); + + if (skipPurge) { + logger.info("Skipping queue purge", { queueUrls }); + } else { + logger.info("Purging queues", { queueUrls }); + await purgeQueues(deps.sqsClient, queueUrls); + } if (elastiCacheDeps) { + logger.info("Clearing rate limit and circuit breaker state"); await flushElastiCache(elastiCacheDeps); } @@ -148,6 +202,9 @@ export async function runPerformanceTest( lastCbSnapshotSec, out, ); + logger.info("Sampling queue depths", { queueUrls }); + const depthSample = await getQueueDepths(deps.sqsClient, queueUrls); + logger.info("Queue depth sample", { queues: depthSample.queues }); if (!stopPolling) { await sleepFn(scenario.metricsIntervalSecs * 1000); @@ -157,20 +214,35 @@ export async function runPerformanceTest( const pollPromise = pollLoop(); - for (const phase of scenario.phases) { + for (const [index, phase] of scenario.phases.entries()) { + logger.info("Starting phase", { + index, + targetEps: phase.targetEps, + durationSecs: phase.durationSecs, + }); const result = await generatePhaseLoad( deps.sqsClient, deps.queueUrl, phase, - scenario.eventMix, + phase.eventMix ?? scenario.eventMix, ); + logger.info("Phase complete", { + index, + targetEps: result.targetEps, + achievedEps: result.achievedEps, + sent: result.sent, + durationMs: result.durationMs, + }); phaseResults.push(result); } stopPolling = true; await pollPromise; - await sleepFn(CLOUDWATCH_SETTLING_MS); + logger.info("Waiting for CloudWatch logs to settle", { + settlingMs: cloudWatchSettlingMs, + }); + await sleepFn(cloudWatchSettlingMs); const finalStartSec = Math.floor(testStartMs / 1000); const finalEndSec = Math.floor(Date.now() / 1000); @@ -183,45 +255,33 @@ export async function runPerformanceTest( lastCbSnapshotSec, out, ); + logger.info("Sampling queue depths", { queueUrls }); + const finalDepthSample = await getQueueDepths(deps.sqsClient, queueUrls); + logger.info("Final queue depth sample", { queues: finalDepthSample.queues }); - const perClientRateTimelines: PerClientRateTimeline[] = []; - - if (deps.deliveryLogGroupPrefix) { - const clientIds = [...new Set(scenario.eventMix.map((e) => e.clientId))]; - const timelinePromises = clientIds.map(async (clientId) => { - const logGroupName = `${deps.deliveryLogGroupPrefix}${clientId}`; - const entries = await queryPerClientRateTimeline( - deps.cloudWatchClient, - logGroupName, - finalStartSec, - finalEndSec, - ); - return { clientId, entries }; - }); - const timelines = await Promise.all(timelinePromises); - perClientRateTimelines.push( - ...timelines.filter((t) => t.entries.length > 0), - ); - } + const perClientRateTimelines = await collectPerClientRateTimelines( + deps, + scenario, + finalStartSec, + finalEndSec, + ); - let webhookVerification: WebhookVerificationResult | undefined; - if (deps.mockWebhookLogGroup) { - webhookVerification = await verifyMockWebhook( - deps.cloudWatchClient, - deps.mockWebhookLogGroup, - finalStartSec, - finalEndSec, - ); - } + const webhookVerification = await collectWebhookVerification( + deps, + finalStartSec, + finalEndSec, + ); let rateLimitStateAfter: EndpointRateLimitState[] | undefined; if (elastiCacheDeps) { rateLimitStateAfter = await dumpRateLimitState(elastiCacheDeps); } - await purgeQueues(deps.sqsClient, queueUrls); - if (elastiCacheDeps) { - await flushElastiCache(elastiCacheDeps); + if (skipPurge) { + logger.info("Skipping final queue purge", { queueUrls }); + } else { + await purgeQueues(deps.sqsClient, queueUrls); + logger.info("Final queue purge complete", { queueUrls }); } return { diff --git a/lambdas/perf-runner-lambda/src/scenario.ts b/lambdas/perf-runner-lambda/src/scenario.ts deleted file mode 100644 index 30c7cf72..00000000 --- a/lambdas/perf-runner-lambda/src/scenario.ts +++ /dev/null @@ -1,82 +0,0 @@ -import type { Scenario } from "types"; - -export const DEFAULT_SCENARIO: Scenario = { - phases: [ - { durationSecs: 15, targetEps: 1000 }, - { durationSecs: 15, targetEps: 2000 }, - { durationSecs: 30, targetEps: 3000 }, - ], - eventMix: [ - // perf-client-1: all message statuses → all subscription paths exercised - { - weight: 4, - factory: "messageStatus", - clientId: "perf-client-1", - messageStatus: "DELIVERED", - }, - { - weight: 2, - factory: "messageStatus", - clientId: "perf-client-1", - messageStatus: "FAILED", - }, - { - weight: 1, - factory: "messageStatus", - clientId: "perf-client-1", - messageStatus: "SENDING", - }, - { - weight: 1, - factory: "messageStatus", - clientId: "perf-client-1", - messageStatus: "PENDING_ENRICHMENT", - }, - // perf-client-2: channel status events - { - weight: 3, - factory: "channelStatus", - clientId: "perf-client-2", - channelStatus: "DELIVERED", - }, - { - weight: 1, - factory: "channelStatus", - clientId: "perf-client-2", - channelStatus: "FAILED", - }, - { - weight: 1, - factory: "channelStatus", - clientId: "perf-client-2", - channelStatus: "RETRY", - }, - // perf-client-3: DELIVERED matches (fan-out to 2 targets); SENDING is filtered - { - weight: 2, - factory: "messageStatus", - clientId: "perf-client-3", - messageStatus: "DELIVERED", - }, - { - weight: 1, - factory: "messageStatus", - clientId: "perf-client-3", - messageStatus: "SENDING", - }, - // perf-client-4: mixed message + channel status - { - weight: 2, - factory: "messageStatus", - clientId: "perf-client-4", - messageStatus: "DELIVERED", - }, - { - weight: 1, - factory: "channelStatus", - clientId: "perf-client-4", - channelStatus: "DELIVERED", - }, - ], - metricsIntervalSecs: 15, -}; diff --git a/lambdas/perf-runner-lambda/src/sqs-stats.ts b/lambdas/perf-runner-lambda/src/sqs-stats.ts new file mode 100644 index 00000000..5d573793 --- /dev/null +++ b/lambdas/perf-runner-lambda/src/sqs-stats.ts @@ -0,0 +1,29 @@ +import { GetQueueAttributesCommand, type SQSClient } from "@aws-sdk/client-sqs"; +import type { QueueDepthSample } from "types"; + +export async function getQueueDepths( + client: SQSClient, + queueUrls: string[], +): Promise { + const queues = await Promise.all( + queueUrls.map(async (url) => { + const response = await client.send( + new GetQueueAttributesCommand({ + QueueUrl: url, + AttributeNames: [ + "ApproximateNumberOfMessages", + "ApproximateNumberOfMessagesNotVisible", + ], + }), + ); + const attrs = response.Attributes ?? {}; + return { + queueUrl: url, + visible: Number(attrs.ApproximateNumberOfMessages ?? "0"), + notVisible: Number(attrs.ApproximateNumberOfMessagesNotVisible ?? "0"), + }; + }), + ); + + return { timestampMs: Date.now(), queues }; +} diff --git a/lambdas/perf-runner-lambda/src/types.ts b/lambdas/perf-runner-lambda/src/types.ts index 4415ef63..8820f5dc 100644 --- a/lambdas/perf-runner-lambda/src/types.ts +++ b/lambdas/perf-runner-lambda/src/types.ts @@ -10,6 +10,8 @@ export type MessageStatusMixEntry = { factory: "messageStatus"; clientId: string; messageStatus: MessageStatus; + forcedStatusCode?: number; + forcedStatusCodeUntilMs?: number; }; export type ChannelStatusMixEntry = { @@ -17,6 +19,8 @@ export type ChannelStatusMixEntry = { factory: "channelStatus"; clientId: string; channelStatus: ChannelStatus; + forcedStatusCode?: number; + forcedStatusCodeUntilMs?: number; }; export type EventMixEntry = MessageStatusMixEntry | ChannelStatusMixEntry; @@ -24,6 +28,7 @@ export type EventMixEntry = MessageStatusMixEntry | ChannelStatusMixEntry; export type Phase = { durationSecs: number; targetEps: number; + eventMix?: EventMixEntry[]; }; export type Scenario = { @@ -97,6 +102,15 @@ export type WebhookVerificationResult = { verified: boolean; }; +export type QueueDepthSample = { + timestampMs: number; + queues: { + queueUrl: string; + visible: number; + notVisible: number; + }[]; +}; + export type PerformanceResult = { testId: string; scenario: Scenario; @@ -114,7 +128,9 @@ export type PerformanceResult = { export type PerfRunnerPayload = { testId: string; - scenario?: Scenario; + scenario: Scenario; + cloudWatchSettlingMs?: number; + skipPurge?: boolean; }; export type RunnerDeps = { diff --git a/tests/integration/delivery-resilience.test.ts b/tests/integration/delivery-resilience.test.ts index 0d6c083c..062a3dcc 100644 --- a/tests/integration/delivery-resilience.test.ts +++ b/tests/integration/delivery-resilience.test.ts @@ -108,7 +108,7 @@ describe("Delivery Resilience", () => { }); describe("Rate Limiting", () => { - const BURST_SIZE = 30; + const BURST_SIZE = 15; let dlqUrl: string; let deliveryUrl: string; let httpsClientLogGroup: string; diff --git a/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json b/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json index 21e53636..1129c337 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json +++ b/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json @@ -29,7 +29,7 @@ }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", "invocationMethod": "POST", - "invocationRateLimit": 2, + "invocationRateLimit": 1, "targetId": "target-rl-001", "type": "API" } diff --git a/tests/performance/fixtures/subscriptions/perf-client-1.json b/tests/performance/fixtures/subscriptions/perf-client-1.json index 1c730b8a..161e28b1 100644 --- a/tests/performance/fixtures/subscriptions/perf-client-1.json +++ b/tests/performance/fixtures/subscriptions/perf-client-1.json @@ -36,7 +36,7 @@ }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", "invocationMethod": "POST", - "invocationRateLimit": 300, + "invocationRateLimit": 100, "targetId": "target-39dbd795-5909-40ab-95b2-4e88b11a2813", "type": "API" } diff --git a/tests/performance/fixtures/subscriptions/perf-client-2.json b/tests/performance/fixtures/subscriptions/perf-client-2.json index d3c58a93..d519da1e 100644 --- a/tests/performance/fixtures/subscriptions/perf-client-2.json +++ b/tests/performance/fixtures/subscriptions/perf-client-2.json @@ -46,7 +46,7 @@ }, "invocationEndpoint": "https://REPLACED_BY_TERRAFORM", "invocationMethod": "POST", - "invocationRateLimit": 300, + "invocationRateLimit": 5000, "targetId": "target-e3ccc2c2-7b19-4475-80d5-51a1182d239a", "type": "API" } From dfe1ed5d769ee916df8a5d146828347c9c664f5b Mon Sep 17 00:00:00 2001 From: Tim Marston Date: Wed, 6 May 2026 10:03:10 +0100 Subject: [PATCH 41/65] intent: generate idempotencyKey from attributes --- .../message-status-transformer.test.ts | 3 +-- .../message-status-transformer.ts | 19 +++++-------------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/transformers/message-status-transformer.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/transformers/message-status-transformer.test.ts index ce9b71ac..d3dd2f03 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/transformers/message-status-transformer.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/transformers/message-status-transformer.test.ts @@ -53,8 +53,6 @@ describe("message-status-transformer", () => { messageId: "msg-789-xyz", messageReference: "client-ref-12345", messageStatus: "delivered", - messageStatusDescription: "Message successfully delivered", - messageFailureReasonCode: undefined, channels: [ { type: "nhsapp", channelStatus: "delivered" }, { type: "sms", channelStatus: "skipped" }, @@ -65,6 +63,7 @@ describe("message-status-transformer", () => { version: "ztoe2qRAM8M8vS0bqajhyEBcvXacrGPp", createdDate: "2023-11-17T14:27:51.413Z", }, + messageStatusDescription: "Message successfully delivered", }; const expectedIdempotencyKey = createHash("sha256") .update(JSON.stringify(idempotencyBody)) diff --git a/lambdas/client-transform-filter-lambda/src/services/transformers/message-status-transformer.ts b/lambdas/client-transform-filter-lambda/src/services/transformers/message-status-transformer.ts index a2803568..049cfb02 100644 --- a/lambdas/client-transform-filter-lambda/src/services/transformers/message-status-transformer.ts +++ b/lambdas/client-transform-filter-lambda/src/services/transformers/message-status-transformer.ts @@ -24,20 +24,6 @@ export function transformMessageStatus( }), ); - const idempotencyBody = { - messageId, - messageReference: data.messageReference, - messageStatus, - messageStatusDescription: data.messageStatusDescription, - messageFailureReasonCode: data.messageFailureReasonCode, - channels, - routingPlan: data.routingPlan, - }; - - const idempotencyKey = createHash("sha256") - .update(JSON.stringify(idempotencyBody)) - .digest("hex"); - const attributes: MessageStatusAttributes = { messageId, messageReference: data.messageReference, @@ -55,6 +41,11 @@ export function transformMessageStatus( attributes.messageFailureReasonCode = data.messageFailureReasonCode; } + const { timestamp: _, ...idempotencyBody } = attributes; + const idempotencyKey = createHash("sha256") + .update(JSON.stringify(idempotencyBody)) + .digest("hex"); + const payload: ClientCallbackPayload = { data: [ { From 768d110f04e8f950ce6f5d34849b3a75235077b1 Mon Sep 17 00:00:00 2001 From: rhyscoxnhs Date: Wed, 6 May 2026 11:15:32 +0100 Subject: [PATCH 42/65] CCM-16073 - Initial work on infra refactor (#177) * CCM-16073 - Initial work on infra refactor * CCM-16073 - PR feedback * CCM-16073 - PR feedback * CCM-16073 - PR feedback --- .github/actions/acceptance-tests/action.yaml | 3 +- .../terraform/components/callbacks/README.md | 5 - .../terraform/components/callbacks/locals.tf | 12 +- .../components/callbacks/locals_tfscaffold.tf | 4 +- .../callbacks/module_client_delivery.tf | 12 +- .../components/callbacks/module_kms.tf | 9 +- .../callbacks/module_mock_webhook_lambda.tf | 2 +- .../callbacks/module_perf_runner_lambda.tf | 17 +-- .../callbacks/module_sqs_inbound_event.tf | 4 +- .../module_transform_filter_lambda.tf | 8 +- .../callbacks/s3_bucket_client_config.tf | 104 --------------- .../callbacks/s3_object_applications_map.tf | 13 ++ .../callbacks/s3_object_client_config.tf | 12 ++ ..._certs.tf => s3_object_mtls_test_certs.tf} | 69 +--------- .../ssm_parameter_applications_map.tf | 19 --- .../callbacks/sync-client-config.sh | 5 +- .../components/callbacks/variables.tf | 18 --- .../modules/client-delivery/README.md | 4 +- .../client-delivery/iam_role_sqs_target.tf | 8 +- .../module_https_client_lambda.tf | 7 +- .../modules/client-delivery/variables.tf | 15 ++- .../src/__tests__/services/metrics.test.ts | 4 +- lambdas/https-client-lambda/package.json | 2 +- .../src/__tests__/applications-map.test.ts | 118 +++++++++++++++++ .../src/__tests__/handler.test.ts | 2 +- .../__tests__/ssm-applications-map.test.ts | 117 ----------------- lambdas/https-client-lambda/src/handler.ts | 2 +- .../src/services/applications-map.ts | 73 +++++++++++ .../src/services/ssm-applications-map.ts | 69 ---------- .../src/__tests__/cloudwatch.test.ts | 2 +- .../src/__tests__/index.test.ts | 18 +-- .../src/__tests__/purge.test.ts | 48 +++++-- .../src/__tests__/runner.test.ts | 19 ++- lambdas/perf-runner-lambda/src/index.ts | 2 + lambdas/perf-runner-lambda/src/purge.ts | 10 +- lambdas/perf-runner-lambda/src/runner.ts | 6 +- lambdas/perf-runner-lambda/src/types.ts | 1 + pnpm-lock.yaml | 67 ++-------- pnpm-workspace.yaml | 2 +- scripts/tests/integration-debug.sh | 13 +- scripts/tests/integration-local.sh | 2 +- tests/integration/delivery-resilience.test.ts | 4 +- tests/integration/dlq-alarms.test.ts | 4 +- tests/integration/helpers/sqs.ts | 15 ++- tests/integration/helpers/test-context.ts | 3 + tests/integration/metrics.test.ts | 2 +- tests/test-support/helpers/deployment.ts | 23 ++-- .../package.json | 2 +- .../src/__tests__/aws.test.ts | 95 ++++++++++---- .../src/__tests__/container.test.ts | 2 + .../cli/applications-map-add.test.ts | 27 ++-- .../cli/applications-map-get.test.ts | 21 +-- .../__tests__/entrypoint/cli/helper.test.ts | 37 ++++-- ...ap.test.ts => s3-applications-map.test.ts} | 68 +++++----- .../src/__tests__/repository/s3.test.ts | 54 ++++++++ .../src/aws.ts | 124 ++++++++++-------- .../entrypoint/cli/applications-map-add.ts | 16 +-- .../entrypoint/cli/applications-map-get.ts | 12 +- .../src/entrypoint/cli/helper.ts | 64 ++++++--- .../src/repository/s3-applications-map.ts | 69 ++++++++++ .../src/repository/s3.ts | 14 +- .../src/repository/ssm-applications-map.ts | 78 ----------- 62 files changed, 837 insertions(+), 824 deletions(-) delete mode 100644 infrastructure/terraform/components/callbacks/s3_bucket_client_config.tf create mode 100644 infrastructure/terraform/components/callbacks/s3_object_applications_map.tf create mode 100644 infrastructure/terraform/components/callbacks/s3_object_client_config.tf rename infrastructure/terraform/components/callbacks/{s3_bucket_mtls_test_certs.tf => s3_object_mtls_test_certs.tf} (69%) delete mode 100644 infrastructure/terraform/components/callbacks/ssm_parameter_applications_map.tf create mode 100644 lambdas/https-client-lambda/src/__tests__/applications-map.test.ts delete mode 100644 lambdas/https-client-lambda/src/__tests__/ssm-applications-map.test.ts create mode 100644 lambdas/https-client-lambda/src/services/applications-map.ts delete mode 100644 lambdas/https-client-lambda/src/services/ssm-applications-map.ts rename tools/client-subscriptions-management/src/__tests__/repository/{ssm-applications-map.test.ts => s3-applications-map.test.ts} (67%) create mode 100644 tools/client-subscriptions-management/src/repository/s3-applications-map.ts delete mode 100644 tools/client-subscriptions-management/src/repository/ssm-applications-map.ts diff --git a/.github/actions/acceptance-tests/action.yaml b/.github/actions/acceptance-tests/action.yaml index 7fe3c28a..6f6a44ba 100644 --- a/.github/actions/acceptance-tests/action.yaml +++ b/.github/actions/acceptance-tests/action.yaml @@ -49,6 +49,7 @@ runs: shell: bash env: PROJECT: nhs - COMPONENT: ${{ inputs.targetComponent }} + COMPONENT: cb + CLIENT_COMPONENT: cbc run: | make test-${{ inputs.testType }} diff --git a/infrastructure/terraform/components/callbacks/README.md b/infrastructure/terraform/components/callbacks/README.md index ec0f13e8..1265dfce 100644 --- a/infrastructure/terraform/components/callbacks/README.md +++ b/infrastructure/terraform/components/callbacks/README.md @@ -15,7 +15,6 @@ | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [applications\_map\_parameter\_name](#input\_applications\_map\_parameter\_name) | SSM Parameter Store path for the clientId-to-applicationData map, where applicationData is currently only the applicationId | `string` | `null` | no | | [aws\_account\_id](#input\_aws\_account\_id) | The AWS Account ID (numeric) | `string` | n/a | yes | | [cb\_cooldown\_period\_ms](#input\_cb\_cooldown\_period\_ms) | Full block duration after circuit opens, before half-open probes begin (ms) | `number` | `120000` | no | | [cb\_recovery\_period\_ms](#input\_cb\_recovery\_period\_ms) | Linear ramp-up duration after circuit closes (ms) | `number` | `600000` | no | @@ -39,7 +38,6 @@ | [log\_retention\_in\_days](#input\_log\_retention\_in\_days) | The retention period in days for the Cloudwatch Logs events to be retained, default of 0 is indefinite | `number` | `0` | no | | [message\_root\_uri](#input\_message\_root\_uri) | The root URI used for constructing message links in callback payloads | `string` | n/a | yes | | [mtls\_ca\_s3\_key](#input\_mtls\_ca\_s3\_key) | S3 key for the CA certificate PEM bundle used for server verification | `string` | `""` | no | -| [mtls\_cert\_s3\_bucket](#input\_mtls\_cert\_s3\_bucket) | S3 bucket containing the mTLS client certificate bundle | `string` | `""` | no | | [mtls\_cert\_s3\_key](#input\_mtls\_cert\_s3\_key) | S3 key for the mTLS client certificate PEM bundle | `string` | `""` | no | | [parent\_acct\_environment](#input\_parent\_acct\_environment) | Name of the environment responsible for the acct resources used, affects things like DNS zone. Useful for named dev environments | `string` | `"main"` | no | | [pipe\_event\_patterns](#input\_pipe\_event\_patterns) | value | `list(string)` | `[]` | no | @@ -48,7 +46,6 @@ | [pipe\_sqs\_max\_batch\_window](#input\_pipe\_sqs\_max\_batch\_window) | n/a | `number` | `2` | no | | [project](#input\_project) | The name of the tfscaffold project | `string` | n/a | yes | | [region](#input\_region) | The AWS Region | `string` | n/a | yes | -| [s3\_enable\_force\_destroy](#input\_s3\_enable\_force\_destroy) | Whether to enable force destroy for the S3 buckets created in this module | `bool` | `false` | no | | [sqs\_inbound\_event\_max\_receive\_count](#input\_sqs\_inbound\_event\_max\_receive\_count) | n/a | `number` | `3` | no | | [sqs\_inbound\_event\_visibility\_timeout\_seconds](#input\_sqs\_inbound\_event\_visibility\_timeout\_seconds) | n/a | `number` | `60` | no | | [token\_bucket\_burst\_capacity](#input\_token\_bucket\_burst\_capacity) | Token bucket burst capacity used by the rate limiter | `number` | `2250` | no | @@ -56,12 +53,10 @@ | Name | Source | Version | |------|--------|---------| -| [client\_config\_bucket](#module\_client\_config\_bucket) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-s3bucket.zip | n/a | | [client\_delivery](#module\_client\_delivery) | ../../modules/client-delivery | n/a | | [client\_transform\_filter\_lambda](#module\_client\_transform\_filter\_lambda) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-lambda.zip | n/a | | [kms](#module\_kms) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-kms.zip | n/a | | [mock\_webhook\_lambda](#module\_mock\_webhook\_lambda) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-lambda.zip | n/a | -| [mtls\_test\_certs\_bucket](#module\_mtls\_test\_certs\_bucket) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-s3bucket.zip | n/a | | [perf\_runner\_lambda](#module\_perf\_runner\_lambda) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-lambda.zip | n/a | | [sqs\_inbound\_event](#module\_sqs\_inbound\_event) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-sqs.zip | n/a | ## Outputs diff --git a/infrastructure/terraform/components/callbacks/locals.tf b/infrastructure/terraform/components/callbacks/locals.tf index 68129a5b..20c735fc 100644 --- a/infrastructure/terraform/components/callbacks/locals.tf +++ b/infrastructure/terraform/components/callbacks/locals.tf @@ -1,9 +1,11 @@ locals { bc_name = "client-callbacks" + component = "cb" + client_csi = "${var.project}-${var.environment}-cbc" aws_lambda_functions_dir_path = "../../../../lambdas" log_destination_arn = "arn:aws:firehose:${var.region}:${var.aws_account_id}:deliverystream/nhs-main-obs-splunk-logs-firehose" - root_domain_name = "${var.environment}.${local.acct.route53_zone_names["client-callbacks"]}" # e.g. [main|dev|abxy0].smsnudge.[dev|nonprod|prod].nhsnotify.national.nhs.uk - root_domain_id = local.acct.route53_zone_ids["client-callbacks"] + root_domain_name = "${var.environment}.${local.acct.route53_zone_names[local.bc_name]}" # e.g. [main|dev|abxy0].smsnudge.[dev|nonprod|prod].nhsnotify.national.nhs.uk + root_domain_id = local.acct.route53_zone_ids[local.bc_name] clients_dir_path = "${path.module}/../../modules/clients" @@ -64,7 +66,9 @@ locals { ]...) } - applications_map_parameter_name = coalesce(var.applications_map_parameter_name, "/${var.project}/${var.environment}/${var.component}/applications-map") + applications_map_s3_key = "${var.environment}/applications-map.json" - client_config_bucket_arn = "arn:aws:s3:::${var.project}-${var.aws_account_id}-${var.region}-${var.environment}-${var.component}-subscription-config" + client_config_s3_bucket = local.acct.additional_s3_buckets["client-callbacks_client-configs"].name + applications_map_s3_bucket = local.acct.additional_s3_buckets["client-callbacks_apps-map"].name + client_config_bucket_arn = local.acct.additional_s3_buckets["client-callbacks_client-configs"].arn } diff --git a/infrastructure/terraform/components/callbacks/locals_tfscaffold.tf b/infrastructure/terraform/components/callbacks/locals_tfscaffold.tf index b7cf3217..5206fd1a 100644 --- a/infrastructure/terraform/components/callbacks/locals_tfscaffold.tf +++ b/infrastructure/terraform/components/callbacks/locals_tfscaffold.tf @@ -11,7 +11,7 @@ locals { "%s-%s-%s", var.project, var.environment, - var.component, + local.component, ), "_", "", @@ -25,7 +25,7 @@ locals { var.aws_account_id, var.region, var.environment, - var.component, + local.component, ), "_", "", diff --git a/infrastructure/terraform/components/callbacks/module_client_delivery.tf b/infrastructure/terraform/components/callbacks/module_client_delivery.tf index 5a8e7974..fb95f01d 100644 --- a/infrastructure/terraform/components/callbacks/module_client_delivery.tf +++ b/infrastructure/terraform/components/callbacks/module_client_delivery.tf @@ -5,7 +5,7 @@ module "client_delivery" { project = var.project aws_account_id = var.aws_account_id region = var.region - component = var.component + component = "cbc" environment = var.environment group = var.group @@ -16,10 +16,12 @@ module "client_delivery" { subscriptions = local.client_subscriptions[each.key] subscription_targets = local.client_subscription_targets[each.key] - client_config_bucket = module.client_config_bucket.bucket - client_config_bucket_arn = module.client_config_bucket.arn + client_config_bucket = local.client_config_s3_bucket + client_config_bucket_arn = local.client_config_bucket_arn + client_config_key_prefix = "${var.environment}/client_subscriptions/" - applications_map_parameter_name = local.applications_map_parameter_name + applications_map_s3_bucket = local.applications_map_s3_bucket + applications_map_s3_key = local.applications_map_s3_key delivery_lambda_s3_bucket = local.acct.s3_buckets["lambda_function_artefacts"]["id"] delivery_lambda_code_base_path = local.aws_lambda_functions_dir_path @@ -34,7 +36,7 @@ module "client_delivery" { elasticache_endpoint = aws_elasticache_serverless_cache.delivery_state.endpoint[0].address elasticache_cache_name = aws_elasticache_serverless_cache.delivery_state.name - elasticache_iam_username = "${var.project}-${var.environment}-${var.component}-elasticache-user" + elasticache_iam_username = "${var.project}-${var.environment}-${local.component}-elasticache-user" mtls_cert_s3_bucket = local.mtls_cert_s3_bucket mtls_cert_s3_key = local.mtls_cert_s3_key # gitleaks:allow diff --git a/infrastructure/terraform/components/callbacks/module_kms.tf b/infrastructure/terraform/components/callbacks/module_kms.tf index 327b5641..af8fddae 100644 --- a/infrastructure/terraform/components/callbacks/module_kms.tf +++ b/infrastructure/terraform/components/callbacks/module_kms.tf @@ -2,7 +2,7 @@ module "kms" { source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-kms.zip" aws_account_id = var.aws_account_id - component = var.component + component = local.component environment = var.environment project = var.project region = var.region @@ -64,9 +64,10 @@ data "aws_iam_policy_document" "kms" { test = "ArnLike" variable = "kms:EncryptionContext:aws:sqs:arn" values = [ - "arn:aws:sqs:${var.region}:${var.aws_account_id}:${var.project}-${var.environment}-callbacks-inbound-event-queue", - "arn:aws:sqs:${var.region}:${var.aws_account_id}:${var.project}-${var.environment}-callbacks-inbound-event-dlq", - "arn:aws:sqs:${var.region}:${var.aws_account_id}:${var.project}-${var.environment}-callbacks-*-dlq-queue" #wildcard here so that DLQs for clients can also use this key + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${var.project}-${var.environment}-${local.component}-inbound-event-queue", + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${var.project}-${var.environment}-${local.component}-inbound-event-dlq", + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.client_csi}-*-delivery-queue", + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.client_csi}-*-delivery-dlq-queue", ] } } diff --git a/infrastructure/terraform/components/callbacks/module_mock_webhook_lambda.tf b/infrastructure/terraform/components/callbacks/module_mock_webhook_lambda.tf index 467dc1c6..6f2a5f9b 100644 --- a/infrastructure/terraform/components/callbacks/module_mock_webhook_lambda.tf +++ b/infrastructure/terraform/components/callbacks/module_mock_webhook_lambda.tf @@ -6,7 +6,7 @@ module "mock_webhook_lambda" { description = "Mock webhook endpoint for integration testing - logs received callbacks to CloudWatch" aws_account_id = var.aws_account_id - component = var.component + component = local.component environment = var.environment project = var.project region = var.region diff --git a/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf b/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf index a7bf92db..aee63a91 100644 --- a/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf +++ b/infrastructure/terraform/components/callbacks/module_perf_runner_lambda.tf @@ -6,7 +6,7 @@ module "perf_runner_lambda" { description = "Lambda function that executes performance tests against the client callbacks pipeline from within AWS" aws_account_id = var.aws_account_id - component = var.component + component = local.component environment = var.environment project = var.project region = var.region @@ -38,12 +38,13 @@ module "perf_runner_lambda" { lambda_env_vars = { ENVIRONMENT = var.environment INBOUND_QUEUE_URL = module.sqs_inbound_event.sqs_queue_url + DELIVERY_QUEUE_URL_PREFIX = "https://sqs.${var.region}.amazonaws.com/${var.aws_account_id}/${local.client_csi}-" TRANSFORM_FILTER_LOG_GROUP = module.client_transform_filter_lambda.cloudwatch_log_group_name - DELIVERY_LOG_GROUP_PREFIX = "/aws/lambda/${local.csi}-https-client-" + DELIVERY_LOG_GROUP_PREFIX = "/aws/lambda/${local.client_csi}-https-client-" MOCK_WEBHOOK_LOG_GROUP = var.deploy_mock_clients ? module.mock_webhook_lambda[0].cloudwatch_log_group_name : "" ELASTICACHE_ENDPOINT = aws_elasticache_serverless_cache.delivery_state.endpoint[0].address ELASTICACHE_CACHE_NAME = aws_elasticache_serverless_cache.delivery_state.name - ELASTICACHE_IAM_USERNAME = "${var.project}-${var.environment}-${var.component}-elasticache-user" + ELASTICACHE_IAM_USERNAME = "${var.project}-${var.environment}-${local.component}-elasticache-user" } vpc_config = { @@ -94,8 +95,8 @@ data "aws_iam_policy_document" "perf_runner_lambda" { resources = [ module.sqs_inbound_event.sqs_queue_arn, module.sqs_inbound_event.sqs_dlq_arn, - "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.csi}-*-delivery-queue", - "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.csi}-*-delivery-dlq-queue", + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.client_csi}-*-delivery-queue", + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.client_csi}-*-delivery-dlq-queue", ] } @@ -110,8 +111,8 @@ data "aws_iam_policy_document" "perf_runner_lambda" { resources = [ module.sqs_inbound_event.sqs_queue_arn, module.sqs_inbound_event.sqs_dlq_arn, - "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.csi}-*-delivery-queue", - "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.csi}-*-delivery-dlq-queue", + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.client_csi}-*-delivery-queue", + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.client_csi}-*-delivery-dlq-queue", ] } @@ -127,7 +128,7 @@ data "aws_iam_policy_document" "perf_runner_lambda" { resources = concat( [ "arn:aws:logs:${var.region}:${var.aws_account_id}:log-group:${module.client_transform_filter_lambda.cloudwatch_log_group_name}:*", - "arn:aws:logs:${var.region}:${var.aws_account_id}:log-group:/aws/lambda/${local.csi}-https-client-*", + "arn:aws:logs:${var.region}:${var.aws_account_id}:log-group:/aws/lambda/${local.client_csi}-https-client-*", ], var.deploy_mock_clients ? [ "arn:aws:logs:${var.region}:${var.aws_account_id}:log-group:${module.mock_webhook_lambda[0].cloudwatch_log_group_name}:*", diff --git a/infrastructure/terraform/components/callbacks/module_sqs_inbound_event.tf b/infrastructure/terraform/components/callbacks/module_sqs_inbound_event.tf index 2e3080fe..2a15e357 100644 --- a/infrastructure/terraform/components/callbacks/module_sqs_inbound_event.tf +++ b/infrastructure/terraform/components/callbacks/module_sqs_inbound_event.tf @@ -2,7 +2,7 @@ module "sqs_inbound_event" { source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-sqs.zip" aws_account_id = var.aws_account_id - component = var.component + component = local.component environment = var.environment project = var.project region = var.region @@ -33,7 +33,7 @@ data "aws_iam_policy_document" "sqs_inbound_event" { ] resources = [ - "arn:aws:sqs:${var.region}:${var.aws_account_id}:${var.project}-${var.environment}-${var.component}-inbound-event-queue" + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${var.project}-${var.environment}-${local.component}-inbound-event-queue" ] condition { diff --git a/infrastructure/terraform/components/callbacks/module_transform_filter_lambda.tf b/infrastructure/terraform/components/callbacks/module_transform_filter_lambda.tf index 2b75ddd5..e6aeb5f6 100644 --- a/infrastructure/terraform/components/callbacks/module_transform_filter_lambda.tf +++ b/infrastructure/terraform/components/callbacks/module_transform_filter_lambda.tf @@ -5,7 +5,7 @@ module "client_transform_filter_lambda" { description = "Lambda function that transforms and filters events coming to through the eventpipe" aws_account_id = var.aws_account_id - component = var.component + component = local.component environment = var.environment project = var.project region = var.region @@ -37,9 +37,9 @@ module "client_transform_filter_lambda" { lambda_env_vars = { ENVIRONMENT = var.environment - METRICS_NAMESPACE = "nhs-notify-client-callbacks" - CLIENT_SUBSCRIPTION_CONFIG_BUCKET = module.client_config_bucket.bucket - CLIENT_SUBSCRIPTION_CONFIG_PREFIX = "client_subscriptions/" + METRICS_NAMESPACE = "nhs-notify-cb" + CLIENT_SUBSCRIPTION_CONFIG_BUCKET = local.client_config_s3_bucket + CLIENT_SUBSCRIPTION_CONFIG_PREFIX = "${var.environment}/client_subscriptions/" CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS = "60" MESSAGE_ROOT_URI = var.message_root_uri } diff --git a/infrastructure/terraform/components/callbacks/s3_bucket_client_config.tf b/infrastructure/terraform/components/callbacks/s3_bucket_client_config.tf deleted file mode 100644 index 9943affd..00000000 --- a/infrastructure/terraform/components/callbacks/s3_bucket_client_config.tf +++ /dev/null @@ -1,104 +0,0 @@ -resource "aws_s3_object" "mock_client_config" { - for_each = var.deploy_mock_clients ? toset(keys(local.config_clients)) : toset([]) - - bucket = module.client_config_bucket.id - key = "client_subscriptions/${local.config_clients[each.key].clientId}.json" - content = jsonencode(local.enriched_mock_config_clients[each.key]) - - kms_key_id = module.kms.key_arn - server_side_encryption = "aws:kms" - - content_type = "application/json" -} - -module "client_config_bucket" { - source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-s3bucket.zip" - - name = "subscription-config" - - aws_account_id = var.aws_account_id - component = var.component - environment = var.environment - project = var.project - region = var.region - - default_tags = merge( - local.default_tags, - { - Description = "Client subscription configuration storage" - } - ) - - kms_key_arn = module.kms.key_arn - force_destroy = var.s3_enable_force_destroy - versioning = true - object_ownership = "BucketOwnerPreferred" - bucket_key_enabled = true - - policy_documents = [ - data.aws_iam_policy_document.client_config_bucket.json - ] -} - -data "aws_iam_policy_document" "client_config_bucket" { - statement { - sid = "AllowLambdaListAccess" - effect = "Allow" - - principals { - type = "AWS" - identifiers = [module.client_transform_filter_lambda.iam_role_arn] - } - - actions = [ - "s3:ListBucket", - ] - - resources = [ - local.client_config_bucket_arn, - ] - } - - statement { - sid = "AllowLambdaReadAccess" - effect = "Allow" - - principals { - type = "AWS" - identifiers = [module.client_transform_filter_lambda.iam_role_arn] - } - - actions = [ - "s3:GetObject", - ] - - resources = [ - "${local.client_config_bucket_arn}/*", - ] - } - - statement { - sid = "DenyInsecureTransport" - effect = "Deny" - - principals { - type = "*" - identifiers = ["*"] - } - - actions = [ - "s3:*", - ] - - resources = [ - local.client_config_bucket_arn, - "${local.client_config_bucket_arn}/*" - ] - - condition { - test = "Bool" - variable = "aws:SecureTransport" - values = ["false"] - } - } -} diff --git a/infrastructure/terraform/components/callbacks/s3_object_applications_map.tf b/infrastructure/terraform/components/callbacks/s3_object_applications_map.tf new file mode 100644 index 00000000..9911ede3 --- /dev/null +++ b/infrastructure/terraform/components/callbacks/s3_object_applications_map.tf @@ -0,0 +1,13 @@ +resource "aws_s3_object" "applications_map" { + bucket = local.applications_map_s3_bucket + key = local.applications_map_s3_key + content = jsonencode(var.deploy_mock_clients ? { for client_id, client in local.config_clients : client_id => "${client_id}-app-id" } : {}) + content_type = "application/json" + kms_key_id = module.kms.key_arn + + server_side_encryption = "aws:kms" + + lifecycle { + ignore_changes = [content] + } +} diff --git a/infrastructure/terraform/components/callbacks/s3_object_client_config.tf b/infrastructure/terraform/components/callbacks/s3_object_client_config.tf new file mode 100644 index 00000000..1f0a6c74 --- /dev/null +++ b/infrastructure/terraform/components/callbacks/s3_object_client_config.tf @@ -0,0 +1,12 @@ +resource "aws_s3_object" "mock_client_config" { + for_each = var.deploy_mock_clients ? toset(keys(local.config_clients)) : toset([]) + + bucket = local.client_config_s3_bucket + key = "${var.environment}/client_subscriptions/${local.config_clients[each.key].clientId}.json" + content = jsonencode(local.enriched_mock_config_clients[each.key]) + + kms_key_id = module.kms.key_arn + server_side_encryption = "aws:kms" + + content_type = "application/json" +} diff --git a/infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf b/infrastructure/terraform/components/callbacks/s3_object_mtls_test_certs.tf similarity index 69% rename from infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf rename to infrastructure/terraform/components/callbacks/s3_object_mtls_test_certs.tf index c29806cd..72678dbb 100644 --- a/infrastructure/terraform/components/callbacks/s3_bucket_mtls_test_certs.tf +++ b/infrastructure/terraform/components/callbacks/s3_object_mtls_test_certs.tf @@ -1,67 +1,8 @@ -module "mtls_test_certs_bucket" { - count = var.deploy_mock_clients ? 1 : 0 - source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/3.0.7/terraform-s3bucket.zip" - - name = "mtls-test-certs" - - aws_account_id = var.aws_account_id - component = var.component - environment = var.environment - project = var.project - region = var.region - - default_tags = merge( - local.default_tags, - { - Description = "mTLS test certificate material for non-production callback delivery" - } - ) - - kms_key_arn = module.kms.key_arn - force_destroy = var.s3_enable_force_destroy - versioning = false - object_ownership = "BucketOwnerPreferred" - bucket_key_enabled = true - - policy_documents = [ - data.aws_iam_policy_document.mtls_test_certs_bucket[0].json - ] -} - -data "aws_iam_policy_document" "mtls_test_certs_bucket" { - count = var.deploy_mock_clients ? 1 : 0 - - statement { - sid = "DenyInsecureTransport" - effect = "Deny" - - principals { - type = "*" - identifiers = ["*"] - } - - actions = [ - "s3:*", - ] - - resources = [ - "arn:aws:s3:::${var.project}-${var.aws_account_id}-${var.region}-${var.environment}-${var.component}-mtls-test-certs", - "arn:aws:s3:::${var.project}-${var.aws_account_id}-${var.region}-${var.environment}-${var.component}-mtls-test-certs/*" - ] - - condition { - test = "Bool" - variable = "aws:SecureTransport" - values = ["false"] - } - } -} - locals { - mtls_test_certs_s3_prefix = "callbacks/mtls-test" + mtls_test_certs_s3_prefix = "${var.environment}/callbacks/mtls-test" mtls_test_cert_s3_key = "${local.mtls_test_certs_s3_prefix}/client-bundle.pem" mtls_test_ca_s3_key = "${local.mtls_test_certs_s3_prefix}/ca.pem" - mtls_cert_s3_bucket = var.deploy_mock_clients ? module.mtls_test_certs_bucket[0].bucket : var.mtls_cert_s3_bucket + mtls_cert_s3_bucket = local.acct.additional_s3_buckets["client-callbacks_certs"].name mtls_cert_s3_key = var.deploy_mock_clients ? local.mtls_test_cert_s3_key : var.mtls_cert_s3_key # gitleaks:allow mtls_ca_s3_key = var.deploy_mock_clients ? local.mtls_test_ca_s3_key : var.mtls_ca_s3_key # gitleaks:allow } @@ -158,20 +99,22 @@ resource "tls_locally_signed_cert" "mock_server" { resource "aws_s3_object" "mtls_test_client_bundle" { count = var.deploy_mock_clients ? 1 : 0 - bucket = module.mtls_test_certs_bucket[0].id + bucket = local.mtls_cert_s3_bucket key = local.mtls_test_cert_s3_key # gitleaks:allow content = "${tls_locally_signed_cert.test_client[0].cert_pem}${tls_private_key.test_client[0].private_key_pem}" + kms_key_id = module.kms.key_arn server_side_encryption = "aws:kms" content_type = "application/x-pem-file" } resource "aws_s3_object" "mtls_test_ca" { count = var.deploy_mock_clients ? 1 : 0 - bucket = module.mtls_test_certs_bucket[0].id + bucket = local.mtls_cert_s3_bucket key = local.mtls_test_ca_s3_key # gitleaks:allow content = tls_self_signed_cert.test_ca[0].cert_pem + kms_key_id = module.kms.key_arn server_side_encryption = "aws:kms" content_type = "application/x-pem-file" } diff --git a/infrastructure/terraform/components/callbacks/ssm_parameter_applications_map.tf b/infrastructure/terraform/components/callbacks/ssm_parameter_applications_map.tf deleted file mode 100644 index 567647d1..00000000 --- a/infrastructure/terraform/components/callbacks/ssm_parameter_applications_map.tf +++ /dev/null @@ -1,19 +0,0 @@ -resource "random_password" "mock_application_id" { - for_each = var.deploy_mock_clients ? toset(keys(local.config_clients)) : toset([]) - length = 24 - special = false -} - -resource "aws_ssm_parameter" "applications_map" { - name = local.applications_map_parameter_name - type = "SecureString" - key_id = module.kms.key_arn - - value = var.deploy_mock_clients ? jsonencode({ - for id in keys(local.config_clients) : local.config_clients[id].clientId => random_password.mock_application_id[id].result - }) : jsonencode({}) - - lifecycle { - ignore_changes = [value] - } -} diff --git a/infrastructure/terraform/components/callbacks/sync-client-config.sh b/infrastructure/terraform/components/callbacks/sync-client-config.sh index 2c2a3ecb..9226cf01 100755 --- a/infrastructure/terraform/components/callbacks/sync-client-config.sh +++ b/infrastructure/terraform/components/callbacks/sync-client-config.sh @@ -18,9 +18,10 @@ cd "${repo_root}" rm -f "${clients_dir}"/*.json -bucket_name="nhs-${AWS_ACCOUNT_ID}-${AWS_REGION}-${ENVIRONMENT}-callbacks-subscription-config" -s3_prefix="client_subscriptions/" +bucket_name="nhs-${AWS_ACCOUNT_ID}-${AWS_REGION}-main-acct-clie-client-configs" + +s3_prefix="${ENVIRONMENT}/client_subscriptions/" echo "Seeding client configs from s3://${bucket_name}/${s3_prefix} for ${ENVIRONMENT}/${AWS_REGION}" diff --git a/infrastructure/terraform/components/callbacks/variables.tf b/infrastructure/terraform/components/callbacks/variables.tf index 44b2347f..2dbb9efb 100644 --- a/infrastructure/terraform/components/callbacks/variables.tf +++ b/infrastructure/terraform/components/callbacks/variables.tf @@ -172,24 +172,6 @@ variable "message_root_uri" { description = "The root URI used for constructing message links in callback payloads" } -variable "applications_map_parameter_name" { - type = string - default = null - description = "SSM Parameter Store path for the clientId-to-applicationData map, where applicationData is currently only the applicationId" -} - -variable "s3_enable_force_destroy" { - type = bool - description = "Whether to enable force destroy for the S3 buckets created in this module" - default = false -} - -variable "mtls_cert_s3_bucket" { - type = string - description = "S3 bucket containing the mTLS client certificate bundle" - default = "" -} - variable "mtls_cert_s3_key" { type = string description = "S3 key for the mTLS client certificate PEM bundle" diff --git a/infrastructure/terraform/modules/client-delivery/README.md b/infrastructure/terraform/modules/client-delivery/README.md index 2debff89..f8c742a2 100644 --- a/infrastructure/terraform/modules/client-delivery/README.md +++ b/infrastructure/terraform/modules/client-delivery/README.md @@ -9,13 +9,15 @@ No requirements. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [applications\_map\_parameter\_name](#input\_applications\_map\_parameter\_name) | SSM Parameter Store path for the clientId-to-applicationData map | `string` | n/a | yes | +| [applications\_map\_s3\_bucket](#input\_applications\_map\_s3\_bucket) | S3 bucket containing the applications map JSON | `string` | n/a | yes | +| [applications\_map\_s3\_key](#input\_applications\_map\_s3\_key) | S3 key for the applications map JSON file | `string` | n/a | yes | | [aws\_account\_id](#input\_aws\_account\_id) | Account ID | `string` | n/a | yes | | [cb\_cooldown\_period\_ms](#input\_cb\_cooldown\_period\_ms) | Full block duration after circuit opens, before half-open probes begin (ms) | `number` | `120000` | no | | [cb\_recovery\_period\_ms](#input\_cb\_recovery\_period\_ms) | Linear ramp-up duration after circuit closes (ms) | `number` | `600000` | no | | [client\_bus\_name](#input\_client\_bus\_name) | EventBridge bus name for subscription rules | `string` | n/a | yes | | [client\_config\_bucket](#input\_client\_config\_bucket) | S3 bucket name containing client subscription configuration | `string` | n/a | yes | | [client\_config\_bucket\_arn](#input\_client\_config\_bucket\_arn) | S3 bucket ARN containing client subscription configuration | `string` | n/a | yes | +| [client\_config\_key\_prefix](#input\_client\_config\_key\_prefix) | S3 key prefix for client subscription configuration files | `string` | `"client_subscriptions/"` | no | | [client\_id](#input\_client\_id) | Unique identifier for this client | `string` | n/a | yes | | [component](#input\_component) | Component name | `string` | n/a | yes | | [delivery\_lambda\_batch\_size](#input\_delivery\_lambda\_batch\_size) | Number of SQS messages per Lambda invocation | `number` | `100` | no | diff --git a/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf b/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf index 5a9fbae3..f9abf363 100644 --- a/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf +++ b/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf @@ -43,15 +43,15 @@ data "aws_iam_policy_document" "https_client_lambda" { } statement { - sid = "SSMGetApplicationsMap" + sid = "S3ApplicationsMapReadAccess" effect = "Allow" actions = [ - "ssm:GetParameter", + "s3:GetObject", ] resources = [ - "arn:aws:ssm:${var.region}:${var.aws_account_id}:parameter${var.applications_map_parameter_name}", + "arn:aws:s3:::${var.applications_map_s3_bucket}/${var.applications_map_s3_key}", ] } @@ -64,7 +64,7 @@ data "aws_iam_policy_document" "https_client_lambda" { ] resources = [ - "${var.client_config_bucket_arn}/client_subscriptions/*", + "${var.client_config_bucket_arn}/${var.client_config_key_prefix}*", ] } diff --git a/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf b/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf index 3f9ddb70..3abd532c 100644 --- a/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf +++ b/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf @@ -36,18 +36,19 @@ module "https_client_lambda" { log_subscription_role_arn = var.log_subscription_role_arn lambda_env_vars = { - APPLICATIONS_MAP_PARAMETER = var.applications_map_parameter_name + APPLICATIONS_MAP_S3_BUCKET = var.applications_map_s3_bucket + APPLICATIONS_MAP_S3_KEY = var.applications_map_s3_key CLIENT_ID = var.client_id CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS = "60" CLIENT_SUBSCRIPTION_CONFIG_BUCKET = var.client_config_bucket - CLIENT_SUBSCRIPTION_CONFIG_PREFIX = "client_subscriptions/" + CLIENT_SUBSCRIPTION_CONFIG_PREFIX = var.client_config_key_prefix DLQ_URL = module.dlq_delivery.sqs_queue_url ELASTICACHE_CACHE_NAME = var.elasticache_cache_name ELASTICACHE_ENDPOINT = var.elasticache_endpoint ELASTICACHE_IAM_USERNAME = var.elasticache_iam_username ENVIRONMENT = var.environment MAX_RETRY_DURATION_SECONDS = tostring(var.max_retry_duration_seconds) - METRICS_NAMESPACE = "nhs-notify-client-callbacks" + METRICS_NAMESPACE = "nhs-notify-cb" MTLS_CA_S3_KEY = var.mtls_ca_s3_key # gitleaks:allow MTLS_CERT_S3_BUCKET = var.mtls_cert_s3_bucket MTLS_CERT_S3_KEY = var.mtls_cert_s3_key # gitleaks:allow diff --git a/infrastructure/terraform/modules/client-delivery/variables.tf b/infrastructure/terraform/modules/client-delivery/variables.tf index ebdcdc75..fde3edbb 100644 --- a/infrastructure/terraform/modules/client-delivery/variables.tf +++ b/infrastructure/terraform/modules/client-delivery/variables.tf @@ -70,9 +70,20 @@ variable "client_config_bucket_arn" { description = "S3 bucket ARN containing client subscription configuration" } -variable "applications_map_parameter_name" { +variable "client_config_key_prefix" { type = string - description = "SSM Parameter Store path for the clientId-to-applicationData map" + description = "S3 key prefix for client subscription configuration files" + default = "client_subscriptions/" +} + +variable "applications_map_s3_bucket" { + type = string + description = "S3 bucket containing the applications map JSON" +} + +variable "applications_map_s3_key" { + type = string + description = "S3 key for the applications map JSON file" } variable "delivery_lambda_s3_bucket" { diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/metrics.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/metrics.test.ts index 3e1a419e..f600cae1 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/metrics.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/services/metrics.test.ts @@ -44,7 +44,7 @@ describe("createMetricsLogger", () => { }); it("should throw if ENVIRONMENT is not set", () => { - process.env.METRICS_NAMESPACE = "nhs-notify-client-callbacks-metrics"; + process.env.METRICS_NAMESPACE = "nhs-notify-cb"; expect(() => createMetricLogger()).toThrow( "ENVIRONMENT environment variable is not set", @@ -61,7 +61,7 @@ describe("createMetricsLogger", () => { }); it("should use ENVIRONMENT environment variable", () => { - process.env.METRICS_NAMESPACE = "nhs-notify-client-callbacks-metrics"; + process.env.METRICS_NAMESPACE = "nhs-notify-cb"; process.env.ENVIRONMENT = "production"; createMetricLogger(); diff --git a/lambdas/https-client-lambda/package.json b/lambdas/https-client-lambda/package.json index 88b36769..8082b859 100644 --- a/lambdas/https-client-lambda/package.json +++ b/lambdas/https-client-lambda/package.json @@ -3,7 +3,6 @@ "@aws-crypto/sha256-js": "catalog:aws", "@aws-sdk/client-s3": "catalog:aws", "@aws-sdk/client-sqs": "catalog:aws", - "@aws-sdk/client-ssm": "catalog:aws", "@aws-sdk/credential-providers": "catalog:aws", "@smithy/signature-v4": "catalog:aws", "@nhs-notify-client-callbacks/config-subscription-cache": "workspace:*", @@ -16,6 +15,7 @@ "p-map": "catalog:app" }, "devDependencies": { + "@smithy/types": "catalog:aws", "@tsconfig/node22": "catalog:tools", "@types/aws-lambda": "catalog:tools", "@types/jest": "catalog:test", diff --git a/lambdas/https-client-lambda/src/__tests__/applications-map.test.ts b/lambdas/https-client-lambda/src/__tests__/applications-map.test.ts new file mode 100644 index 00000000..5c5b2f63 --- /dev/null +++ b/lambdas/https-client-lambda/src/__tests__/applications-map.test.ts @@ -0,0 +1,118 @@ +import { GetObjectCommand } from "@aws-sdk/client-s3"; +import type { SdkStream } from "@smithy/types"; + +import { getApplicationId, resetCache } from "services/applications-map"; + +const mockSend = jest.fn(); +jest.mock("@aws-sdk/client-s3", () => { + const actual = jest.requireActual("@aws-sdk/client-s3"); + return { + ...actual, + S3Client: jest.fn().mockImplementation(() => ({ + send: (...args: unknown[]) => mockSend(...args), + })), + }; +}); + +jest.mock("@nhs-notify-client-callbacks/logger", () => ({ + logger: { + info: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + debug: jest.fn(), + }, +})); + +process.env.APPLICATIONS_MAP_S3_BUCKET = "test-bucket"; +process.env.APPLICATIONS_MAP_S3_KEY = "test/applications-map.json"; + +const mockBody = (content: string) => + ({ + transformToString: jest.fn().mockResolvedValue(content), + }) as unknown as SdkStream; + +describe("getApplicationId", () => { + beforeEach(() => { + mockSend.mockReset(); + resetCache(); + }); + + it("returns correct applicationId for a known clientId", async () => { + mockSend.mockResolvedValue({ + Body: mockBody( + JSON.stringify({ "client-1": "app-id-1", "client-2": "app-id-2" }), + ), + }); + + const result = await getApplicationId("client-1"); + + expect(result).toBe("app-id-1"); + expect(mockSend).toHaveBeenCalledTimes(1); + expect(mockSend.mock.calls[0][0]).toBeInstanceOf(GetObjectCommand); + }); + + it("throws for unknown clientId", async () => { + mockSend.mockResolvedValue({ + Body: mockBody(JSON.stringify({ "client-1": "app-id-1" })), + }); + + await expect(getApplicationId("unknown")).rejects.toThrow( + "No applicationId found for clientId 'unknown' in applications map", + ); + }); + + it("surfaces S3 SDK errors", async () => { + mockSend.mockRejectedValue(new Error("S3 unavailable")); + + await expect(getApplicationId("client-1")).rejects.toThrow( + "S3 unavailable", + ); + }); + + it("throws when env vars are not set", async () => { + let getFn: typeof getApplicationId; + const savedBucket = process.env.APPLICATIONS_MAP_S3_BUCKET; + const savedKey = process.env.APPLICATIONS_MAP_S3_KEY; + delete process.env.APPLICATIONS_MAP_S3_BUCKET; + delete process.env.APPLICATIONS_MAP_S3_KEY; + + jest.isolateModules(() => { + // eslint-disable-next-line @typescript-eslint/no-require-imports -- jest.isolateModules requires synchronous require + getFn = require("services/applications-map").getApplicationId; + }); + + await expect(getFn!("client-1")).rejects.toThrow( + "APPLICATIONS_MAP_S3_BUCKET and APPLICATIONS_MAP_S3_KEY are required", + ); + + process.env.APPLICATIONS_MAP_S3_BUCKET = savedBucket; + process.env.APPLICATIONS_MAP_S3_KEY = savedKey; + }); + + it("throws when S3 object body is empty", async () => { + mockSend.mockResolvedValue({ Body: undefined }); + + await expect(getApplicationId("client-1")).rejects.toThrow("is empty"); + }); + + it("throws when S3 object contains invalid JSON", async () => { + mockSend.mockResolvedValue({ + Body: mockBody("not-json"), + }); + + await expect(getApplicationId("client-1")).rejects.toThrow( + "contains invalid JSON", + ); + }); + + it("caches the applications map between calls", async () => { + mockSend.mockResolvedValue({ + Body: mockBody(JSON.stringify({ "client-1": "app-id-1" })), + }); + + await getApplicationId("client-1"); + await getApplicationId("client-1"); + + expect(mockSend).toHaveBeenCalledTimes(1); + }); +}); diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index f337f7c7..2d48d865 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -20,7 +20,7 @@ jest.mock("services/config-loader", () => ({ })); const mockGetApplicationId = jest.fn(); -jest.mock("services/ssm-applications-map", () => ({ +jest.mock("services/applications-map", () => ({ getApplicationId: (...args: unknown[]) => mockGetApplicationId(...args), })); diff --git a/lambdas/https-client-lambda/src/__tests__/ssm-applications-map.test.ts b/lambdas/https-client-lambda/src/__tests__/ssm-applications-map.test.ts deleted file mode 100644 index 059023d1..00000000 --- a/lambdas/https-client-lambda/src/__tests__/ssm-applications-map.test.ts +++ /dev/null @@ -1,117 +0,0 @@ -import { GetParameterCommand } from "@aws-sdk/client-ssm"; - -import { getApplicationId, resetCache } from "services/ssm-applications-map"; - -const mockSend = jest.fn(); -jest.mock("@aws-sdk/client-ssm", () => { - const actual = jest.requireActual("@aws-sdk/client-ssm"); - return { - ...actual, - SSMClient: jest.fn().mockImplementation(() => ({ - send: (...args: unknown[]) => mockSend(...args), - })), - }; -}); - -jest.mock("@nhs-notify-client-callbacks/logger", () => ({ - logger: { - info: jest.fn(), - warn: jest.fn(), - error: jest.fn(), - debug: jest.fn(), - }, -})); - -process.env.APPLICATIONS_MAP_PARAMETER = "/test/applications-map"; - -describe("getApplicationId", () => { - beforeEach(() => { - mockSend.mockReset(); - resetCache(); - }); - - it("returns correct applicationId for a known clientId", async () => { - mockSend.mockResolvedValue({ - Parameter: { - Value: JSON.stringify({ - "client-1": "app-id-1", - "client-2": "app-id-2", - }), - }, - }); - - const result = await getApplicationId("client-1"); - - expect(result).toBe("app-id-1"); - expect(mockSend).toHaveBeenCalledTimes(1); - expect(mockSend.mock.calls[0][0]).toBeInstanceOf(GetParameterCommand); - }); - - it("throws for unknown clientId", async () => { - mockSend.mockResolvedValue({ - Parameter: { - Value: JSON.stringify({ "client-1": "app-id-1" }), - }, - }); - - await expect(getApplicationId("unknown")).rejects.toThrow( - "No applicationId found for clientId 'unknown' in SSM map", - ); - }); - - it("surfaces SSM SDK errors", async () => { - mockSend.mockRejectedValue(new Error("SSM unavailable")); - - await expect(getApplicationId("client-1")).rejects.toThrow( - "SSM unavailable", - ); - }); - - it("throws when APPLICATIONS_MAP_PARAMETER is not set", async () => { - let getFn: typeof getApplicationId; - const saved = process.env.APPLICATIONS_MAP_PARAMETER; - delete process.env.APPLICATIONS_MAP_PARAMETER; - - jest.isolateModules(() => { - // eslint-disable-next-line @typescript-eslint/no-require-imports -- jest.isolateModules requires synchronous require - getFn = require("services/ssm-applications-map").getApplicationId; - }); - - await expect(getFn!("client-1")).rejects.toThrow( - "APPLICATIONS_MAP_PARAMETER is required", - ); - - process.env.APPLICATIONS_MAP_PARAMETER = saved; - }); - - it("throws when SSM parameter value is empty", async () => { - mockSend.mockResolvedValue({ Parameter: { Value: undefined } }); - - await expect(getApplicationId("client-1")).rejects.toThrow( - "not found or has no value", - ); - }); - - it("throws when SSM parameter contains invalid JSON", async () => { - mockSend.mockResolvedValue({ - Parameter: { Value: "not-json" }, - }); - - await expect(getApplicationId("client-1")).rejects.toThrow( - "contains invalid JSON", - ); - }); - - it("caches the applications map between calls", async () => { - mockSend.mockResolvedValue({ - Parameter: { - Value: JSON.stringify({ "client-1": "app-id-1" }), - }, - }); - - await getApplicationId("client-1"); - await getApplicationId("client-1"); - - expect(mockSend).toHaveBeenCalledTimes(1); - }); -}); diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index 4c195ed5..9e02b372 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -3,7 +3,7 @@ import type { ClientCallbackPayload } from "@nhs-notify-client-callbacks/models" import pMap from "p-map"; import { logger } from "@nhs-notify-client-callbacks/logger"; import { loadTargetConfig } from "services/config-loader"; -import { getApplicationId } from "services/ssm-applications-map"; +import { getApplicationId } from "services/applications-map"; import { signPayload } from "services/payload-signer"; import { buildAgent } from "services/delivery/tls-agent-factory"; import { diff --git a/lambdas/https-client-lambda/src/services/applications-map.ts b/lambdas/https-client-lambda/src/services/applications-map.ts new file mode 100644 index 00000000..9a86d86c --- /dev/null +++ b/lambdas/https-client-lambda/src/services/applications-map.ts @@ -0,0 +1,73 @@ +import { GetObjectCommand, S3Client } from "@aws-sdk/client-s3"; +import { logger } from "@nhs-notify-client-callbacks/logger"; + +const s3Client = new S3Client({}); + +const DEFAULT_CACHE_TTL_MS = 300_000; // 5 minutes + +let cachedMap: Map | undefined; +let cacheExpiresAt = 0; + +async function loadMap(): Promise> { + if (cachedMap && Date.now() < cacheExpiresAt) { + return cachedMap; + } + + const { APPLICATIONS_MAP_S3_BUCKET, APPLICATIONS_MAP_S3_KEY } = process.env; + if (!APPLICATIONS_MAP_S3_BUCKET || !APPLICATIONS_MAP_S3_KEY) { + throw new Error( + "APPLICATIONS_MAP_S3_BUCKET and APPLICATIONS_MAP_S3_KEY are required", + ); + } + + const response = await s3Client.send( + new GetObjectCommand({ + Bucket: APPLICATIONS_MAP_S3_BUCKET, + Key: APPLICATIONS_MAP_S3_KEY, + }), + ); + + const body = await response.Body?.transformToString(); + if (!body) { + throw new Error( + `S3 object 's3://${APPLICATIONS_MAP_S3_BUCKET}/${APPLICATIONS_MAP_S3_KEY}' is empty`, + ); + } + + let parsed: Record; + try { + parsed = JSON.parse(body) as Record; + } catch { + throw new Error( + `S3 object 's3://${APPLICATIONS_MAP_S3_BUCKET}/${APPLICATIONS_MAP_S3_KEY}' contains invalid JSON`, + ); + } + + cachedMap = new Map(Object.entries(parsed)); + const ttlMs = + Number(process.env.APPLICATIONS_MAP_CACHE_TTL_MS) || DEFAULT_CACHE_TTL_MS; + cacheExpiresAt = Date.now() + ttlMs; + logger.info("Applications map loaded from S3", { + bucket: APPLICATIONS_MAP_S3_BUCKET, + key: APPLICATIONS_MAP_S3_KEY, + }); + return cachedMap; +} + +export async function getApplicationId(clientId: string): Promise { + const map = await loadMap(); + const applicationId = map.get(clientId); + + if (!applicationId) { + throw new Error( + `No applicationId found for clientId '${clientId}' in applications map`, + ); + } + + return applicationId; +} + +export function resetCache(): void { + cachedMap = undefined; + cacheExpiresAt = 0; +} diff --git a/lambdas/https-client-lambda/src/services/ssm-applications-map.ts b/lambdas/https-client-lambda/src/services/ssm-applications-map.ts deleted file mode 100644 index 999c23d9..00000000 --- a/lambdas/https-client-lambda/src/services/ssm-applications-map.ts +++ /dev/null @@ -1,69 +0,0 @@ -import { GetParameterCommand, SSMClient } from "@aws-sdk/client-ssm"; -import { logger } from "@nhs-notify-client-callbacks/logger"; - -const ssmClient = new SSMClient({}); - -const DEFAULT_CACHE_TTL_MS = 300_000; // 5 minutes - -let cachedMap: Map | undefined; -let cacheExpiresAt = 0; - -async function loadMap(): Promise> { - if (cachedMap && Date.now() < cacheExpiresAt) { - return cachedMap; - } - - const { APPLICATIONS_MAP_PARAMETER } = process.env; - if (!APPLICATIONS_MAP_PARAMETER) { - throw new Error("APPLICATIONS_MAP_PARAMETER is required"); - } - - const response = await ssmClient.send( - new GetParameterCommand({ - Name: APPLICATIONS_MAP_PARAMETER, - WithDecryption: true, - }), - ); - - if (!response.Parameter?.Value) { - throw new Error( - `SSM parameter '${APPLICATIONS_MAP_PARAMETER}' not found or has no value`, - ); - } - - let parsed: Record; - try { - parsed = JSON.parse(response.Parameter.Value) as Record; - } catch { - throw new Error( - `SSM parameter '${APPLICATIONS_MAP_PARAMETER}' contains invalid JSON`, - ); - } - - cachedMap = new Map(Object.entries(parsed)); - const ttlMs = - Number(process.env.APPLICATIONS_MAP_CACHE_TTL_MS) || DEFAULT_CACHE_TTL_MS; - cacheExpiresAt = Date.now() + ttlMs; - logger.info("Applications map loaded from SSM", { - parameterName: APPLICATIONS_MAP_PARAMETER, - }); - return cachedMap; -} - -export async function getApplicationId(clientId: string): Promise { - const map = await loadMap(); - const applicationId = map.get(clientId); - - if (!applicationId) { - throw new Error( - `No applicationId found for clientId '${clientId}' in SSM map`, - ); - } - - return applicationId; -} - -export function resetCache(): void { - cachedMap = undefined; - cacheExpiresAt = 0; -} diff --git a/lambdas/perf-runner-lambda/src/__tests__/cloudwatch.test.ts b/lambdas/perf-runner-lambda/src/__tests__/cloudwatch.test.ts index 526de638..a6f4d36c 100644 --- a/lambdas/perf-runner-lambda/src/__tests__/cloudwatch.test.ts +++ b/lambdas/perf-runner-lambda/src/__tests__/cloudwatch.test.ts @@ -24,7 +24,7 @@ describe("queryMetricsSnapshot", () => { const result = await queryMetricsSnapshot( mockCloudWatchClient, - "/aws/lambda/nhs-dev-callbacks-client-transform-filter", + "/aws/lambda/nhs-dev-cb-client-transform-filter", 1_700_000_000, 1_700_000_060, ); diff --git a/lambdas/perf-runner-lambda/src/__tests__/index.test.ts b/lambdas/perf-runner-lambda/src/__tests__/index.test.ts index b1b5687e..4c99f195 100644 --- a/lambdas/perf-runner-lambda/src/__tests__/index.test.ts +++ b/lambdas/perf-runner-lambda/src/__tests__/index.test.ts @@ -51,12 +51,13 @@ beforeEach(() => { jest.clearAllMocks(); mockRunPerformanceTest.mockResolvedValue(mockResult); process.env.INBOUND_QUEUE_URL = "https://sqs.example.invalid/queue"; + process.env.DELIVERY_QUEUE_URL_PREFIX = + "https://sqs.example.invalid/nhs-dev-cbc-"; process.env.TRANSFORM_FILTER_LOG_GROUP = - "/aws/lambda/nhs-dev-callbacks-client-transform-filter"; + "/aws/lambda/nhs-dev-cb-client-transform-filter"; process.env.DELIVERY_LOG_GROUP_PREFIX = - "/aws/lambda/nhs-dev-callbacks-https-client-"; - process.env.MOCK_WEBHOOK_LOG_GROUP = - "/aws/lambda/nhs-dev-callbacks-mock-webhook"; + "/aws/lambda/nhs-dev-cbc-https-client-"; + process.env.MOCK_WEBHOOK_LOG_GROUP = "/aws/lambda/nhs-dev-cb-mock-webhook"; process.env.ELASTICACHE_ENDPOINT = "cache.example.invalid"; process.env.ELASTICACHE_CACHE_NAME = "test-cache"; process.env.ELASTICACHE_IAM_USERNAME = "test-user"; @@ -71,9 +72,10 @@ describe("handler", () => { expect(mockRunPerformanceTest).toHaveBeenCalledWith( expect.objectContaining({ queueUrl: "https://sqs.example.invalid/queue", - logGroupName: "/aws/lambda/nhs-dev-callbacks-client-transform-filter", - deliveryLogGroupPrefix: "/aws/lambda/nhs-dev-callbacks-https-client-", - mockWebhookLogGroup: "/aws/lambda/nhs-dev-callbacks-mock-webhook", + deliveryQueueUrlPrefix: "https://sqs.example.invalid/nhs-dev-cbc-", + logGroupName: "/aws/lambda/nhs-dev-cb-client-transform-filter", + deliveryLogGroupPrefix: "/aws/lambda/nhs-dev-cbc-https-client-", + mockWebhookLogGroup: "/aws/lambda/nhs-dev-cb-mock-webhook", }), testScenario, "test-id", @@ -183,7 +185,7 @@ describe("handler", () => { expect(mockRunPerformanceTest).toHaveBeenCalledWith( expect.objectContaining({ - mockWebhookLogGroup: "/aws/lambda/nhs-dev-callbacks-mock-webhook", + mockWebhookLogGroup: "/aws/lambda/nhs-dev-cb-mock-webhook", }), expect.anything(), "webhook-test", diff --git a/lambdas/perf-runner-lambda/src/__tests__/purge.test.ts b/lambdas/perf-runner-lambda/src/__tests__/purge.test.ts index 52832910..3f70037b 100644 --- a/lambdas/perf-runner-lambda/src/__tests__/purge.test.ts +++ b/lambdas/perf-runner-lambda/src/__tests__/purge.test.ts @@ -22,19 +22,39 @@ const scenario: Scenario = { }; const inboundQueueUrl = - "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-queue"; + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cb-inbound-event-queue"; + +const deliveryQueueUrlPrefix = + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cbc-"; describe("deriveQueueUrls", () => { it("derives all queue URLs from the inbound queue URL and scenario", () => { + const urls = deriveQueueUrls( + inboundQueueUrl, + scenario, + deliveryQueueUrlPrefix, + ); + + expect(urls).toEqual([ + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cb-inbound-event-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cb-inbound-event-dlq", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cbc-perf-client-1-delivery-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cbc-perf-client-1-delivery-dlq-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cbc-perf-client-2-delivery-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cbc-perf-client-2-delivery-dlq-queue", + ]); + }); + + it("falls back to inbound base URL when no delivery prefix provided", () => { const urls = deriveQueueUrls(inboundQueueUrl, scenario); expect(urls).toEqual([ - "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-queue", - "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-dlq", - "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-1-delivery-queue", - "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-1-delivery-dlq-queue", - "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-2-delivery-queue", - "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-2-delivery-dlq-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cb-inbound-event-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cb-inbound-event-dlq", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cb-perf-client-1-delivery-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cb-perf-client-1-delivery-dlq-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cb-perf-client-2-delivery-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cb-perf-client-2-delivery-dlq-queue", ]); }); @@ -57,13 +77,17 @@ describe("deriveQueueUrls", () => { ], }; - const urls = deriveQueueUrls(inboundQueueUrl, duplicateScenario); + const urls = deriveQueueUrls( + inboundQueueUrl, + duplicateScenario, + deliveryQueueUrlPrefix, + ); expect(urls).toEqual([ - "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-queue", - "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-inbound-event-dlq", - "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-1-delivery-queue", - "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-callbacks-perf-client-1-delivery-dlq-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cb-inbound-event-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cb-inbound-event-dlq", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cbc-perf-client-1-delivery-queue", + "https://sqs.eu-west-2.amazonaws.com/123456789/nhs-dev-cbc-perf-client-1-delivery-dlq-queue", ]); }); }); diff --git a/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts b/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts index 9d7acfe5..89933812 100644 --- a/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts +++ b/lambdas/perf-runner-lambda/src/__tests__/runner.test.ts @@ -101,8 +101,9 @@ const deps: RunnerDeps = { sqsClient: {} as SQSClient, cloudWatchClient: {} as CloudWatchLogsClient, queueUrl: "https://sqs.example.invalid/queue", - logGroupName: "/aws/lambda/nhs-dev-callbacks-client-transform-filter", - deliveryLogGroupPrefix: "/aws/lambda/nhs-dev-callbacks-https-client-", + deliveryQueueUrlPrefix: "https://sqs.example.invalid/nhs-dev-cbc-", + logGroupName: "/aws/lambda/nhs-dev-cb-client-transform-filter", + deliveryLogGroupPrefix: "/aws/lambda/nhs-dev-cbc-https-client-", }; beforeEach(() => { @@ -368,8 +369,8 @@ describe("runPerformanceTest", () => { expect(mockQueryDeliveryMetricsSnapshot).toHaveBeenCalledWith( deps.cloudWatchClient, expect.arrayContaining([ - "/aws/lambda/nhs-dev-callbacks-https-client-perf-client-1", - "/aws/lambda/nhs-dev-callbacks-https-client-perf-client-2", + "/aws/lambda/nhs-dev-cbc-https-client-perf-client-1", + "/aws/lambda/nhs-dev-cbc-https-client-perf-client-2", ]), expect.any(Number), expect.any(Number), @@ -495,13 +496,13 @@ describe("runPerformanceTest", () => { expect(mockQueryPerClientRateTimeline).toHaveBeenCalledTimes(2); expect(mockQueryPerClientRateTimeline).toHaveBeenCalledWith( deps.cloudWatchClient, - "/aws/lambda/nhs-dev-callbacks-https-client-perf-client-1", + "/aws/lambda/nhs-dev-cbc-https-client-perf-client-1", expect.any(Number), expect.any(Number), ); expect(mockQueryPerClientRateTimeline).toHaveBeenCalledWith( deps.cloudWatchClient, - "/aws/lambda/nhs-dev-callbacks-https-client-perf-client-2", + "/aws/lambda/nhs-dev-cbc-https-client-perf-client-2", expect.any(Number), expect.any(Number), ); @@ -566,7 +567,11 @@ describe("runPerformanceTest", () => { await runPerformanceTest(deps, scenario, "test-purge", immediateSleep); - expect(mockDeriveQueueUrls).toHaveBeenCalledWith(deps.queueUrl, scenario); + expect(mockDeriveQueueUrls).toHaveBeenCalledWith( + deps.queueUrl, + scenario, + deps.deliveryQueueUrlPrefix, + ); expect(mockPurgeQueues).toHaveBeenCalledTimes(2); }); diff --git a/lambdas/perf-runner-lambda/src/index.ts b/lambdas/perf-runner-lambda/src/index.ts index 2150241e..12def2d6 100644 --- a/lambdas/perf-runner-lambda/src/index.ts +++ b/lambdas/perf-runner-lambda/src/index.ts @@ -17,6 +17,7 @@ export async function handler( const region = process.env.AWS_REGION ?? "eu-west-2"; const queueUrl = process.env.INBOUND_QUEUE_URL; + const deliveryQueueUrlPrefix = process.env.DELIVERY_QUEUE_URL_PREFIX; const logGroupName = process.env.TRANSFORM_FILTER_LOG_GROUP; const deliveryLogGroupPrefix = process.env.DELIVERY_LOG_GROUP_PREFIX; const mockWebhookLogGroup = process.env.MOCK_WEBHOOK_LOG_GROUP; @@ -55,6 +56,7 @@ export async function handler( sqsClient, cloudWatchClient, queueUrl, + deliveryQueueUrlPrefix, logGroupName, deliveryLogGroupPrefix, mockWebhookLogGroup, diff --git a/lambdas/perf-runner-lambda/src/purge.ts b/lambdas/perf-runner-lambda/src/purge.ts index 3f7cb097..40de78e5 100644 --- a/lambdas/perf-runner-lambda/src/purge.ts +++ b/lambdas/perf-runner-lambda/src/purge.ts @@ -4,17 +4,19 @@ import type { Scenario } from "types"; export function deriveQueueUrls( inboundQueueUrl: string, scenario: Scenario, + deliveryQueueUrlPrefix?: string, ): string[] { // eslint-disable-next-line sonarjs/null-dereference -- String.replace always returns a string - const baseUrl = inboundQueueUrl.replace(/inbound-event-queue$/, ""); + const inboundBaseUrl = inboundQueueUrl.replace(/inbound-event-queue$/, ""); + const deliveryBaseUrl = deliveryQueueUrlPrefix ?? inboundBaseUrl; const clientIds = [...new Set(scenario.eventMix.map((e) => e.clientId))]; return [ inboundQueueUrl, - `${baseUrl}inbound-event-dlq`, + `${inboundBaseUrl}inbound-event-dlq`, ...clientIds.flatMap((id) => [ - `${baseUrl}${id}-delivery-queue`, - `${baseUrl}${id}-delivery-dlq-queue`, + `${deliveryBaseUrl}${id}-delivery-queue`, + `${deliveryBaseUrl}${id}-delivery-dlq-queue`, ]), ]; } diff --git a/lambdas/perf-runner-lambda/src/runner.ts b/lambdas/perf-runner-lambda/src/runner.ts index 86ccd435..c2726987 100644 --- a/lambdas/perf-runner-lambda/src/runner.ts +++ b/lambdas/perf-runner-lambda/src/runner.ts @@ -155,7 +155,11 @@ export async function runPerformanceTest( const testStartMs = Date.now(); - const queueUrls = deriveQueueUrls(deps.queueUrl, scenario); + const queueUrls = deriveQueueUrls( + deps.queueUrl, + scenario, + deps.deliveryQueueUrlPrefix, + ); if (skipPurge) { logger.info("Skipping queue purge", { queueUrls }); diff --git a/lambdas/perf-runner-lambda/src/types.ts b/lambdas/perf-runner-lambda/src/types.ts index 8820f5dc..3634a36e 100644 --- a/lambdas/perf-runner-lambda/src/types.ts +++ b/lambdas/perf-runner-lambda/src/types.ts @@ -137,6 +137,7 @@ export type RunnerDeps = { sqsClient: SQSClient; cloudWatchClient: CloudWatchLogsClient; queueUrl: string; + deliveryQueueUrlPrefix?: string; logGroupName: string; deliveryLogGroupPrefix?: string; mockWebhookLogGroup?: string; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 31de1237..f32143dd 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -55,9 +55,6 @@ catalogs: '@aws-sdk/client-sqs': specifier: ^3.1023.0 version: 3.1026.0 - '@aws-sdk/client-ssm': - specifier: ^3.1025.0 - version: 3.1029.0 '@aws-sdk/client-sts': specifier: ^3.1023.0 version: 3.1026.0 @@ -67,6 +64,9 @@ catalogs: '@smithy/signature-v4': specifier: ^5.0.0 version: 5.3.13 + '@smithy/types': + specifier: ^4.3.1 + version: 4.14.0 lint: '@eslint/js': specifier: ^9.39.4 @@ -340,9 +340,6 @@ importers: '@aws-sdk/client-sqs': specifier: catalog:aws version: 3.1026.0 - '@aws-sdk/client-ssm': - specifier: catalog:aws - version: 3.1029.0 '@aws-sdk/credential-providers': specifier: catalog:aws version: 3.1026.0 @@ -374,6 +371,9 @@ importers: specifier: catalog:app version: 4.0.0 devDependencies: + '@smithy/types': + specifier: catalog:aws + version: 4.14.0 '@tsconfig/node22': specifier: catalog:tools version: 22.0.5 @@ -660,9 +660,6 @@ importers: '@aws-sdk/client-s3': specifier: catalog:aws version: 3.1029.0 - '@aws-sdk/client-ssm': - specifier: catalog:aws - version: 3.1029.0 '@aws-sdk/client-sts': specifier: catalog:aws version: 3.1026.0 @@ -685,6 +682,9 @@ importers: specifier: catalog:app version: 4.3.6 devDependencies: + '@smithy/types': + specifier: catalog:aws + version: 4.14.0 '@types/jest': specifier: catalog:test version: 30.0.0 @@ -755,10 +755,6 @@ packages: resolution: {integrity: sha512-b7z2WI1tqObk4U7vUbmBfXIeFhxKbFr7xQ4rWi879iFl5aSPvpd1WAmLi6z1boVKTEwEqHALuE5MyGBHhOCy5A==} engines: {node: '>=20.0.0'} - '@aws-sdk/client-ssm@3.1029.0': - resolution: {integrity: sha512-LthC1Dkh7r4ihZ7EI+6Sms9Ml0XQXoBZbw5LmtT1EJElriMugAfMnG5pKzDAcWpLiZgVBSZVai7moQR/QM/cCw==} - engines: {node: '>=20.0.0'} - '@aws-sdk/client-sts@3.1026.0': resolution: {integrity: sha512-kyqU8QMroxh6vc22cLWRT/wk5I142PiwGpGosnqJ36mLmiLtn84HuDYyivaNRAjKWIUQNlWeB0HHSoeqbn2O6Q==} engines: {node: '>=20.0.0'} @@ -4924,51 +4920,6 @@ snapshots: transitivePeerDependencies: - aws-crt - '@aws-sdk/client-ssm@3.1029.0': - dependencies: - '@aws-crypto/sha256-browser': 5.2.0 - '@aws-crypto/sha256-js': 5.2.0 - '@aws-sdk/core': 3.973.27 - '@aws-sdk/credential-provider-node': 3.972.30 - '@aws-sdk/middleware-host-header': 3.972.9 - '@aws-sdk/middleware-logger': 3.972.9 - '@aws-sdk/middleware-recursion-detection': 3.972.10 - '@aws-sdk/middleware-user-agent': 3.972.29 - '@aws-sdk/region-config-resolver': 3.972.11 - '@aws-sdk/types': 3.973.7 - '@aws-sdk/util-endpoints': 3.996.6 - '@aws-sdk/util-user-agent-browser': 3.972.9 - '@aws-sdk/util-user-agent-node': 3.973.15 - '@smithy/config-resolver': 4.4.14 - '@smithy/core': 3.23.14 - '@smithy/fetch-http-handler': 5.3.16 - '@smithy/hash-node': 4.2.13 - '@smithy/invalid-dependency': 4.2.13 - '@smithy/middleware-content-length': 4.2.13 - '@smithy/middleware-endpoint': 4.4.29 - '@smithy/middleware-retry': 4.5.0 - '@smithy/middleware-serde': 4.2.17 - '@smithy/middleware-stack': 4.2.13 - '@smithy/node-config-provider': 4.3.13 - '@smithy/node-http-handler': 4.5.2 - '@smithy/protocol-http': 5.3.13 - '@smithy/smithy-client': 4.12.9 - '@smithy/types': 4.14.0 - '@smithy/url-parser': 4.2.13 - '@smithy/util-base64': 4.3.2 - '@smithy/util-body-length-browser': 4.2.2 - '@smithy/util-body-length-node': 4.2.3 - '@smithy/util-defaults-mode-browser': 4.3.45 - '@smithy/util-defaults-mode-node': 4.2.49 - '@smithy/util-endpoints': 3.3.4 - '@smithy/util-middleware': 4.2.13 - '@smithy/util-retry': 4.3.0 - '@smithy/util-utf8': 4.2.2 - '@smithy/util-waiter': 4.2.15 - tslib: 2.8.1 - transitivePeerDependencies: - - aws-crt - '@aws-sdk/client-sts@3.1026.0': dependencies: '@aws-crypto/sha256-browser': 5.2.0 diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 3e9e890d..f138cdcc 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -31,11 +31,11 @@ catalogs: "@aws-sdk/client-cloudwatch-logs": "^3.1023.0" "@aws-sdk/client-s3": "^3.1024.0" "@aws-sdk/client-sqs": "^3.1023.0" - "@aws-sdk/client-ssm": "^3.1025.0" "@aws-crypto/sha256-js": "^5.2.0" "@aws-sdk/client-sts": "^3.1023.0" "@aws-sdk/credential-providers": "^3.1023.0" "@smithy/signature-v4": "^5.0.0" + "@smithy/types": "^4.3.1" lint: "@eslint/js": "^9.39.4" "@stylistic/eslint-plugin": "^5.10.0" diff --git a/scripts/tests/integration-debug.sh b/scripts/tests/integration-debug.sh index d7892526..bfa51064 100755 --- a/scripts/tests/integration-debug.sh +++ b/scripts/tests/integration-debug.sh @@ -63,7 +63,8 @@ fi ACCOUNT_ID="$(aws sts get-caller-identity --profile "$AWS_PROFILE" --query Account --output text)" -PREFIX="nhs-${ENVIRONMENT}-callbacks" +PREFIX="nhs-${ENVIRONMENT}-cb" +CLIENT_PREFIX="nhs-${ENVIRONMENT}-cbc" PIPE_NAME="${PREFIX}-main" print_section() { @@ -106,8 +107,8 @@ show_queue_counts() { action_queue_status() { require_client_id - show_queue_counts "Client Delivery Queue - Message Counts" "${PREFIX}-${CLIENT_ID}-delivery-queue" - show_queue_counts "Client Delivery DLQ - Message Counts" "${PREFIX}-${CLIENT_ID}-delivery-dlq-queue" + show_queue_counts "Client Delivery Queue - Message Counts" "${CLIENT_PREFIX}-${CLIENT_ID}-delivery-queue" + show_queue_counts "Client Delivery DLQ - Message Counts" "${CLIENT_PREFIX}-${CLIENT_ID}-delivery-dlq-queue" show_queue_counts "Inbound Event Queue - Message Counts" "${PREFIX}-inbound-event-queue" show_queue_counts "Inbound Event DLQ - Message Counts" "${PREFIX}-inbound-event-dlq" } @@ -136,8 +137,8 @@ peek_queue_message() { action_queue_peek() { require_client_id - peek_queue_message "Client Delivery Queue - Message Peek" "${PREFIX}-${CLIENT_ID}-delivery-queue" - peek_queue_message "Client Delivery DLQ - Message Peek" "${PREFIX}-${CLIENT_ID}-delivery-dlq-queue" + peek_queue_message "Client Delivery Queue - Message Peek" "${CLIENT_PREFIX}-${CLIENT_ID}-delivery-queue" + peek_queue_message "Client Delivery DLQ - Message Peek" "${CLIENT_PREFIX}-${CLIENT_ID}-delivery-dlq-queue" peek_queue_message "Inbound Event Queue - Message Peek" "${PREFIX}-inbound-event-queue" peek_queue_message "Inbound Event DLQ - Message Peek" "${PREFIX}-inbound-event-dlq" return 0 @@ -185,7 +186,7 @@ action_tail_https_client() { print_section "HTTPS Client Lambda Logs" aws logs tail \ - "/aws/lambda/${PREFIX}-https-client-${CLIENT_ID}" \ + "/aws/lambda/${CLIENT_PREFIX}-https-client-${CLIENT_ID}" \ --region "$REGION" \ --profile "$AWS_PROFILE" \ --since "$LOG_SINCE" \ diff --git a/scripts/tests/integration-local.sh b/scripts/tests/integration-local.sh index 55605b2d..95280dcf 100755 --- a/scripts/tests/integration-local.sh +++ b/scripts/tests/integration-local.sh @@ -30,7 +30,7 @@ fi AWS_REGION="${AWS_REGION:-eu-west-2}" LOG_LEVEL="${LOG_LEVEL:-debug}" NODE_OPTIONS="${NODE_OPTIONS:---experimental-vm-modules}" -COMPONENT="callbacks" +COMPONENT="cb" PROJECT="nhs" if ! aws sts get-caller-identity --profile "$AWS_PROFILE" >/dev/null 2>&1; then diff --git a/tests/integration/delivery-resilience.test.ts b/tests/integration/delivery-resilience.test.ts index 062a3dcc..64d32018 100644 --- a/tests/integration/delivery-resilience.test.ts +++ b/tests/integration/delivery-resilience.test.ts @@ -117,7 +117,7 @@ describe("Delivery Resilience", () => { const { clientId } = getClientConfig("clientRateLimit"); dlqUrl = ctx.clientDlqUrl(clientId); deliveryUrl = ctx.clientDeliveryUrl(clientId); - httpsClientLogGroup = ctx.logGroup(`https-client-${clientId}`); + httpsClientLogGroup = ctx.clientLogGroup(`https-client-${clientId}`); await purgeQueues(ctx.sqs, [dlqUrl, deliveryUrl]); }); @@ -190,7 +190,7 @@ describe("Delivery Resilience", () => { const { clientId } = getClientConfig("clientCircuitBreaker"); dlqUrl = ctx.clientDlqUrl(clientId); deliveryUrl = ctx.clientDeliveryUrl(clientId); - httpsClientLogGroup = ctx.logGroup(`https-client-${clientId}`); + httpsClientLogGroup = ctx.clientLogGroup(`https-client-${clientId}`); await purgeQueues(ctx.sqs, [dlqUrl, deliveryUrl]); }); diff --git a/tests/integration/dlq-alarms.test.ts b/tests/integration/dlq-alarms.test.ts index c4f69fa8..ae1e1bff 100644 --- a/tests/integration/dlq-alarms.test.ts +++ b/tests/integration/dlq-alarms.test.ts @@ -13,10 +13,10 @@ import { import { buildMockClientDlqQueueUrl } from "./helpers/sqs"; function buildDlqDepthAlarmName( - { component, environment, project }: DeploymentDetails, + { clientComponent, environment, project }: DeploymentDetails, clientId: string, ): string { - return `${project}-${environment}-${component}-${clientId}-dlq-depth`; + return `${project}-${environment}-${clientComponent}-${clientId}-dlq-depth`; } function getQueueNameFromUrl(queueUrl: string): string { diff --git a/tests/integration/helpers/sqs.ts b/tests/integration/helpers/sqs.ts index 5cdcc3a9..b0d3f4ff 100644 --- a/tests/integration/helpers/sqs.ts +++ b/tests/integration/helpers/sqs.ts @@ -34,7 +34,8 @@ function buildReceiveMessageInput( } function buildQueueUrl( - { accountId, component, environment, project, region }: DeploymentDetails, + { accountId, environment, project, region }: DeploymentDetails, + component: string, name: string, options?: { appendQueueSuffix?: boolean }, ): string { @@ -49,14 +50,22 @@ export function buildMockClientDlqQueueUrl( deploymentDetails: DeploymentDetails, clientId: string, ): string { - return buildQueueUrl(deploymentDetails, `${clientId}-delivery-dlq`); + return buildQueueUrl( + deploymentDetails, + deploymentDetails.clientComponent, + `${clientId}-delivery-dlq`, + ); } export function buildMockClientDeliveryQueueUrl( deploymentDetails: DeploymentDetails, clientId: string, ): string { - return buildQueueUrl(deploymentDetails, `${clientId}-delivery`); + return buildQueueUrl( + deploymentDetails, + deploymentDetails.clientComponent, + `${clientId}-delivery`, + ); } export async function sendSqsEvent( diff --git a/tests/integration/helpers/test-context.ts b/tests/integration/helpers/test-context.ts index df5a31f5..8277f94b 100644 --- a/tests/integration/helpers/test-context.ts +++ b/tests/integration/helpers/test-context.ts @@ -25,6 +25,7 @@ export type TestContext = { clientDlqUrl(clientId: string): string; clientDeliveryUrl(clientId: string): string; logGroup(name: string): string; + clientLogGroup(name: string): string; }; export function createTestContext(): TestContext { @@ -43,6 +44,8 @@ export function createTestContext(): TestContext { clientDeliveryUrl: (clientId) => buildMockClientDeliveryQueueUrl(deployment, clientId), logGroup: (name) => buildLambdaLogGroupName(deployment, name), + clientLogGroup: (name) => + `/aws/lambda/${deployment.project}-${deployment.environment}-${deployment.clientComponent}-${name}`, }; } diff --git a/tests/integration/metrics.test.ts b/tests/integration/metrics.test.ts index 20e1dfb8..cd99588b 100644 --- a/tests/integration/metrics.test.ts +++ b/tests/integration/metrics.test.ts @@ -118,7 +118,7 @@ describe("Metrics", () => { beforeAll(() => { const { clientId } = getClientConfig("clientSingleTarget"); - httpsClientLogGroup = ctx.logGroup(`https-client-${clientId}`); + httpsClientLogGroup = ctx.clientLogGroup(`https-client-${clientId}`); }); it("should emit DeliveryAttempt, DeliverySuccess and DeliveryDurationMs on successful delivery", async () => { diff --git a/tests/test-support/helpers/deployment.ts b/tests/test-support/helpers/deployment.ts index 20bf1f59..dacb3c2d 100644 --- a/tests/test-support/helpers/deployment.ts +++ b/tests/test-support/helpers/deployment.ts @@ -3,6 +3,7 @@ export type DeploymentDetails = { environment: string; project: string; component: string; + clientComponent: string; accountId: string; }; @@ -10,7 +11,8 @@ export function getDeploymentDetails(): DeploymentDetails { const region = process.env.AWS_REGION ?? "eu-west-2"; const environment = process.env.ENVIRONMENT; const project = process.env.PROJECT ?? "nhs"; - const component = process.env.COMPONENT ?? "callbacks"; + const component = process.env.COMPONENT ?? "cb"; + const clientComponent = process.env.CLIENT_COMPONENT ?? "cbc"; const accountId = process.env.AWS_ACCOUNT_ID; if (!environment) { @@ -21,17 +23,14 @@ export function getDeploymentDetails(): DeploymentDetails { throw new Error("AWS_ACCOUNT_ID environment variable must be set"); } - return { region, environment, project, component, accountId }; -} - -export function buildSubscriptionConfigBucketName({ - accountId, - component, - environment, - project, - region, -}: DeploymentDetails): string { - return `${project}-${accountId}-${region}-${environment}-${component}-subscription-config`; + return { + region, + environment, + project, + component, + clientComponent, + accountId, + }; } export function buildLambdaLogGroupName( diff --git a/tools/client-subscriptions-management/package.json b/tools/client-subscriptions-management/package.json index 4d934470..c2e3ebc4 100644 --- a/tools/client-subscriptions-management/package.json +++ b/tools/client-subscriptions-management/package.json @@ -25,7 +25,6 @@ }, "dependencies": { "@aws-sdk/client-s3": "catalog:aws", - "@aws-sdk/client-ssm": "catalog:aws", "@aws-sdk/client-sts": "catalog:aws", "@aws-sdk/credential-providers": "catalog:aws", "@nhs-notify-client-callbacks/models": "workspace:*", @@ -35,6 +34,7 @@ "zod": "catalog:app" }, "devDependencies": { + "@smithy/types": "catalog:aws", "@types/jest": "catalog:test", "@types/node": "catalog:tools", "@types/yargs": "catalog:tools", diff --git a/tools/client-subscriptions-management/src/__tests__/aws.test.ts b/tools/client-subscriptions-management/src/__tests__/aws.test.ts index f08d0bda..d1af5109 100644 --- a/tools/client-subscriptions-management/src/__tests__/aws.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/aws.test.ts @@ -1,8 +1,10 @@ import { + deriveApplicationsMapBucketName, + deriveApplicationsMapKey, deriveBucketName, - deriveParameterName, + resolveApplicationsMapLocation, resolveBucketName, - resolveParameterName, + resolveDeploymentContext, resolveProfile, resolveRegion, } from "src/aws"; @@ -24,20 +26,18 @@ describe("aws", () => { it("derives bucket name from environment using STS account ID", async () => { await expect( resolveBucketName({ environment: "dev", region: "eu-west-2" }), - ).resolves.toBe( - "nhs-123456789012-eu-west-2-dev-callbacks-subscription-config", - ); + ).resolves.toBe("nhs-123456789012-eu-west-2-main-acct-clie-client-configs"); }); it("uses default region eu-west-2 when region is not provided", async () => { await expect(resolveBucketName({ environment: "dev" })).resolves.toBe( - "nhs-123456789012-eu-west-2-dev-callbacks-subscription-config", + "nhs-123456789012-eu-west-2-main-acct-clie-client-configs", ); }); it("derives bucket name correctly", () => { expect(deriveBucketName("123456789012", "dev", "eu-west-2")).toBe( - "nhs-123456789012-eu-west-2-dev-callbacks-subscription-config", + "nhs-123456789012-eu-west-2-main-acct-clie-client-configs", ); }); @@ -81,37 +81,76 @@ describe("aws", () => { expect(resolveRegion(undefined, {} as NodeJS.ProcessEnv)).toBeUndefined(); }); - it("derives parameter name from environment", () => { - expect(deriveParameterName("dev")).toBe( - "/nhs/dev/callbacks/applications-map", + it("derives applications map bucket name", () => { + expect(deriveApplicationsMapBucketName("123456789012", "eu-west-2")).toBe( + "nhs-123456789012-eu-west-2-main-acct-clie-apps-map", ); }); - it("resolves parameter name from explicit argument", () => { - expect(resolveParameterName({ parameterName: "/custom/path" })).toBe( - "/custom/path", - ); + it("derives applications map key from environment", () => { + expect(deriveApplicationsMapKey("dev")).toBe("dev/applications-map.json"); }); - it("derives parameter name from environment argument", () => { - expect(resolveParameterName({ environment: "dev" })).toBe( - "/nhs/dev/callbacks/applications-map", - ); + it("resolves applications map location from explicit arguments", async () => { + await expect( + resolveApplicationsMapLocation({ + bucketName: "my-bucket", + key: "my-key.json", + }), + ).resolves.toEqual({ bucket: "my-bucket", key: "my-key.json" }); }); - it("derives parameter name from ENVIRONMENT env var", () => { - expect( - resolveParameterName({ - env: { ENVIRONMENT: "staging" } as NodeJS.ProcessEnv, + it("derives applications map location from environment", async () => { + await expect( + resolveApplicationsMapLocation({ + environment: "dev", + region: "eu-west-2", }), - ).toBe("/nhs/staging/callbacks/applications-map"); + ).resolves.toEqual({ + bucket: "nhs-123456789012-eu-west-2-main-acct-clie-apps-map", + key: "dev/applications-map.json", + }); }); - it("throws when no parameter name can be resolved", () => { - expect(() => - resolveParameterName({ env: {} as NodeJS.ProcessEnv }), - ).toThrow( - "Environment is required to derive parameter name. Please provide via --environment or ENVIRONMENT env var.", + it("throws when no environment for applications map location", async () => { + await expect( + resolveApplicationsMapLocation({ + env: {} as NodeJS.ProcessEnv, + } as Parameters[0]), + ).rejects.toThrow("Environment is required"); + }); + + it("resolves deployment context with all fields", async () => { + const ctx = await resolveDeploymentContext({ + environment: "dev", + region: "eu-west-2", + profile: "my-profile", + }); + + expect(ctx).toEqual({ + environment: "dev", + region: "eu-west-2", + accountId: "123456789012", + profile: "my-profile", + }); + }); + + it("resolves deployment context with defaults", async () => { + const ctx = await resolveDeploymentContext({ + environment: "staging", + }); + + expect(ctx).toEqual({ + environment: "staging", + region: "eu-west-2", + accountId: "123456789012", + profile: undefined, + }); + }); + + it("throws when environment is missing from deployment context", async () => { + await expect(resolveDeploymentContext({})).rejects.toThrow( + "Environment is required", ); }); }); diff --git a/tools/client-subscriptions-management/src/__tests__/container.test.ts b/tools/client-subscriptions-management/src/__tests__/container.test.ts index 1838175f..7066307e 100644 --- a/tools/client-subscriptions-management/src/__tests__/container.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/container.test.ts @@ -20,12 +20,14 @@ describe("createRepository", () => { const result = createRepository({ bucketName: "bucket-1", + environment: "dev", region: "eu-west-2", }); expect(mockS3Repository).toHaveBeenCalledWith( "bucket-1", expect.any(S3Client), + "dev/", ); expect(mockRepository).toHaveBeenCalledWith( mockS3Repository.mock.instances[0], diff --git a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/applications-map-add.test.ts b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/applications-map-add.test.ts index 99b08ca9..c44eef76 100644 --- a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/applications-map-add.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/applications-map-add.test.ts @@ -12,7 +12,7 @@ const mockFormatApplicationsMap = jest.fn(); jest.mock("src/entrypoint/cli/helper", () => ({ ...jest.requireActual("src/entrypoint/cli/helper"), - createSsmApplicationsMapRepository: jest.fn(), + createS3ApplicationsMapRepository: jest.fn(), })); jest.mock("src/format", () => ({ @@ -21,8 +21,8 @@ jest.mock("src/format", () => ({ mockFormatApplicationsMap(...args), })); -const mockCreateSsmApplicationsMapRepository = - helper.createSsmApplicationsMapRepository as jest.Mock; +const mockCreateS3ApplicationsMapRepository = + helper.createS3ApplicationsMapRepository as jest.Mock; describe("applications-map-add CLI", () => { const originalCliConsoleState = captureCliConsoleState(); @@ -34,8 +34,10 @@ describe("applications-map-add CLI", () => { "client-1", "--application-id", "app-1", - "--parameter-name", - "/nhs/dev/callbacks/applications-map", + "--applications-map-bucket", + "test-bucket", + "--applications-map-key", + "dev/applications-map.json", ]; const resultMap = new Map([["client-1", "app-1"]]); @@ -45,8 +47,8 @@ describe("applications-map-add CLI", () => { mockAddApplication.mockResolvedValue(resultMap); mockFormatApplicationsMap.mockReset(); mockFormatApplicationsMap.mockReturnValue("masked-map-output"); - mockCreateSsmApplicationsMapRepository.mockReset(); - mockCreateSsmApplicationsMapRepository.mockReturnValue({ + mockCreateS3ApplicationsMapRepository.mockReset(); + mockCreateS3ApplicationsMapRepository.mockResolvedValue({ addApplication: mockAddApplication, }); resetCliConsoleState(); @@ -59,11 +61,12 @@ describe("applications-map-add CLI", () => { it("adds application and logs output", async () => { await cli.main(baseArgs); - expect(mockCreateSsmApplicationsMapRepository).toHaveBeenCalledWith( + expect(mockCreateS3ApplicationsMapRepository).toHaveBeenCalledWith( expect.objectContaining({ "client-id": "client-1", "application-id": "app-1", - "parameter-name": "/nhs/dev/callbacks/applications-map", + "applications-map-bucket": "test-bucket", + "applications-map-key": "dev/applications-map.json", }), ); expect(mockAddApplication).toHaveBeenCalledWith("client-1", "app-1", false); @@ -85,7 +88,7 @@ describe("applications-map-add CLI", () => { await cli.main(baseArgs); expect(console.log).not.toHaveBeenCalledWith( - "Dry run — no changes written to SSM.", + "Dry run — no changes written to S3.", ); }); @@ -94,13 +97,13 @@ describe("applications-map-add CLI", () => { expect(mockAddApplication).toHaveBeenCalledWith("client-1", "app-1", true); expect(console.log).toHaveBeenCalledWith( - "Dry run — no changes written to SSM.", + "Dry run — no changes written to S3.", ); }); it("handles errors in wrapped CLI", async () => { expect.hasAssertions(); - mockCreateSsmApplicationsMapRepository.mockReturnValue({ + mockCreateS3ApplicationsMapRepository.mockResolvedValue({ addApplication: jest.fn().mockRejectedValue(new Error("Boom")), }); diff --git a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/applications-map-get.test.ts b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/applications-map-get.test.ts index 3ddb8041..c1f62cb3 100644 --- a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/applications-map-get.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/applications-map-get.test.ts @@ -11,11 +11,11 @@ const mockGetApplication = jest.fn(); jest.mock("src/entrypoint/cli/helper", () => ({ ...jest.requireActual("src/entrypoint/cli/helper"), - createSsmApplicationsMapRepository: jest.fn(), + createS3ApplicationsMapRepository: jest.fn(), })); -const mockCreateSsmApplicationsMapRepository = - helper.createSsmApplicationsMapRepository as jest.Mock; +const mockCreateS3ApplicationsMapRepository = + helper.createS3ApplicationsMapRepository as jest.Mock; describe("applications-map-get CLI", () => { const originalCliConsoleState = captureCliConsoleState(); @@ -25,14 +25,16 @@ describe("applications-map-get CLI", () => { "script", "--client-id", "client-1", - "--parameter-name", - "/nhs/dev/callbacks/applications-map", + "--applications-map-bucket", + "test-bucket", + "--applications-map-key", + "dev/applications-map.json", ]; beforeEach(() => { mockGetApplication.mockReset(); - mockCreateSsmApplicationsMapRepository.mockReset(); - mockCreateSsmApplicationsMapRepository.mockReturnValue({ + mockCreateS3ApplicationsMapRepository.mockReset(); + mockCreateS3ApplicationsMapRepository.mockResolvedValue({ getApplication: mockGetApplication, }); resetCliConsoleState(); @@ -47,10 +49,11 @@ describe("applications-map-get CLI", () => { await cli.main(baseArgs); - expect(mockCreateSsmApplicationsMapRepository).toHaveBeenCalledWith( + expect(mockCreateS3ApplicationsMapRepository).toHaveBeenCalledWith( expect.objectContaining({ "client-id": "client-1", - "parameter-name": "/nhs/dev/callbacks/applications-map", + "applications-map-bucket": "test-bucket", + "applications-map-key": "dev/applications-map.json", }), ); expect(mockGetApplication).toHaveBeenCalledWith("client-1"); diff --git a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/helper.test.ts b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/helper.test.ts index fc33aff7..0f60ad34 100644 --- a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/helper.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/helper.test.ts @@ -1,13 +1,11 @@ const mockCreateRepositoryFromOptions = jest.fn(); const mockResolveBucketName = jest.fn(); -const mockResolveProfile = jest.fn(); -const mockResolveRegion = jest.fn(); +const mockResolveDeploymentContext = jest.fn(); jest.mock("src/aws", () => ({ createRepository: mockCreateRepositoryFromOptions, resolveBucketName: mockResolveBucketName, - resolveProfile: mockResolveProfile, - resolveRegion: mockResolveRegion, + resolveDeploymentContext: mockResolveDeploymentContext, })); import { @@ -17,10 +15,14 @@ import { } from "src/entrypoint/cli/helper"; describe("createRepository", () => { - it("resolves region, profile and bucket then delegates to createRepository from aws", async () => { + it("resolves deployment context and bucket then delegates to createRepository from aws", async () => { const fakeRepo = { listClientIds: jest.fn() }; - mockResolveRegion.mockReturnValue("eu-west-2"); - mockResolveProfile.mockReturnValue("my-profile"); + mockResolveDeploymentContext.mockResolvedValue({ + environment: "my-env", + region: "eu-west-2", + accountId: "123456789012", + profile: "my-profile", + }); mockResolveBucketName.mockResolvedValue("my-bucket"); mockCreateRepositoryFromOptions.mockReturnValue(fakeRepo); @@ -31,8 +33,11 @@ describe("createRepository", () => { environment: "my-env", }); - expect(mockResolveRegion).toHaveBeenCalledWith("eu-west-2"); - expect(mockResolveProfile).toHaveBeenCalledWith("my-profile"); + expect(mockResolveDeploymentContext).toHaveBeenCalledWith({ + environment: "my-env", + region: "eu-west-2", + profile: "my-profile", + }); expect(mockResolveBucketName).toHaveBeenCalledWith({ bucketName: "my-bucket", environment: "my-env", @@ -41,11 +46,25 @@ describe("createRepository", () => { }); expect(mockCreateRepositoryFromOptions).toHaveBeenCalledWith({ bucketName: "my-bucket", + environment: "my-env", region: "eu-west-2", profile: "my-profile", }); expect(result).toBe(fakeRepo); }); + + it("throws when environment is not provided", async () => { + mockResolveDeploymentContext.mockRejectedValue( + new Error("Environment is required"), + ); + + await expect( + createRepository({ + "bucket-name": "my-bucket", + region: "eu-west-2", + } as Parameters[0]), + ).rejects.toThrow("Environment is required"); + }); }); describe("runCommands", () => { diff --git a/tools/client-subscriptions-management/src/__tests__/repository/ssm-applications-map.test.ts b/tools/client-subscriptions-management/src/__tests__/repository/s3-applications-map.test.ts similarity index 67% rename from tools/client-subscriptions-management/src/__tests__/repository/ssm-applications-map.test.ts rename to tools/client-subscriptions-management/src/__tests__/repository/s3-applications-map.test.ts index afb94e41..9e9084f4 100644 --- a/tools/client-subscriptions-management/src/__tests__/repository/ssm-applications-map.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/repository/s3-applications-map.test.ts @@ -1,38 +1,48 @@ import { - GetParameterCommand, - PutParameterCommand, - type SSMClient, -} from "@aws-sdk/client-ssm"; -import SsmApplicationsMapRepository from "src/repository/ssm-applications-map"; + GetObjectCommand, + PutObjectCommand, + type S3Client, +} from "@aws-sdk/client-s3"; +import type { SdkStream } from "@smithy/types"; +import S3ApplicationsMapRepository from "src/repository/s3-applications-map"; + +const mockBody = (content: string) => + ({ + transformToString: jest.fn().mockResolvedValue(content), + }) as unknown as SdkStream; const createRepository = (send: jest.Mock = jest.fn()) => { - const client = { send } as unknown as SSMClient; + const client = { send } as unknown as S3Client; return { - repository: new SsmApplicationsMapRepository(client, "/test/param"), + repository: new S3ApplicationsMapRepository( + client, + "test-bucket", + "test/applications-map.json", + ), send, }; }; -describe("SsmApplicationsMapRepository", () => { +describe("S3ApplicationsMapRepository", () => { describe("getApplication", () => { it("returns the application ID for an existing client", async () => { const { repository, send } = createRepository(); send.mockResolvedValueOnce({ - Parameter: { - Value: JSON.stringify({ "client-1": "app-1", "client-2": "app-2" }), - }, + Body: mockBody( + JSON.stringify({ "client-1": "app-1", "client-2": "app-2" }), + ), }); const result = await repository.getApplication("client-1"); - expect(send).toHaveBeenCalledWith(expect.any(GetParameterCommand)); + expect(send).toHaveBeenCalledWith(expect.any(GetObjectCommand)); expect(result).toBe("app-1"); }); it("returns undefined when the client is not in the map", async () => { const { repository, send } = createRepository(); send.mockResolvedValueOnce({ - Parameter: { Value: JSON.stringify({ "other-client": "app-1" }) }, + Body: mockBody(JSON.stringify({ "other-client": "app-1" })), }); const result = await repository.getApplication("client-1"); @@ -40,10 +50,10 @@ describe("SsmApplicationsMapRepository", () => { expect(result).toBeUndefined(); }); - it("returns undefined when parameter does not exist", async () => { + it("returns undefined when object does not exist", async () => { const { repository, send } = createRepository(); const error = Object.assign(new Error("not found"), { - name: "ParameterNotFound", + name: "NoSuchKey", }); send.mockRejectedValueOnce(error); @@ -52,16 +62,16 @@ describe("SsmApplicationsMapRepository", () => { expect(result).toBeUndefined(); }); - it("returns undefined when parameter has no value", async () => { + it("returns undefined when object body is empty", async () => { const { repository, send } = createRepository(); - send.mockResolvedValueOnce({ Parameter: {} }); + send.mockResolvedValueOnce({ Body: undefined }); const result = await repository.getApplication("client-1"); expect(result).toBeUndefined(); }); - it("rethrows unexpected SSM errors", async () => { + it("rethrows unexpected S3 errors", async () => { const { repository, send } = createRepository(); send.mockRejectedValueOnce( Object.assign(new Error("Network failure"), { name: "NetworkError" }), @@ -78,16 +88,14 @@ describe("SsmApplicationsMapRepository", () => { const { repository, send } = createRepository(); send .mockResolvedValueOnce({ - Parameter: { - Value: JSON.stringify({ "existing-client": "existing-app" }), - }, + Body: mockBody(JSON.stringify({ "existing-client": "existing-app" })), }) .mockResolvedValueOnce({}); const result = await repository.addApplication("client-1", "app-1"); - expect(send).toHaveBeenNthCalledWith(1, expect.any(GetParameterCommand)); - expect(send).toHaveBeenNthCalledWith(2, expect.any(PutParameterCommand)); + expect(send).toHaveBeenNthCalledWith(1, expect.any(GetObjectCommand)); + expect(send).toHaveBeenNthCalledWith(2, expect.any(PutObjectCommand)); expect(result).toEqual( new Map([ ["existing-client", "existing-app"], @@ -96,10 +104,10 @@ describe("SsmApplicationsMapRepository", () => { ); }); - it("starts from empty map when parameter does not exist", async () => { + it("starts from empty map when object does not exist", async () => { const { repository, send } = createRepository(); const error = Object.assign(new Error("not found"), { - name: "ParameterNotFound", + name: "NoSuchKey", }); send.mockRejectedValueOnce(error).mockResolvedValueOnce({}); @@ -109,9 +117,9 @@ describe("SsmApplicationsMapRepository", () => { expect(send).toHaveBeenCalledTimes(2); }); - it("starts from empty map when parameter has no value", async () => { + it("starts from empty map when object body is empty", async () => { const { repository, send } = createRepository(); - send.mockResolvedValueOnce({ Parameter: {} }).mockResolvedValueOnce({}); + send.mockResolvedValueOnce({ Body: undefined }).mockResolvedValueOnce({}); const result = await repository.addApplication("client-1", "app-1"); @@ -122,7 +130,7 @@ describe("SsmApplicationsMapRepository", () => { const { repository, send } = createRepository(); send .mockResolvedValueOnce({ - Parameter: { Value: JSON.stringify({ "client-1": "old-app" }) }, + Body: mockBody(JSON.stringify({ "client-1": "old-app" })), }) .mockResolvedValueOnce({}); @@ -134,7 +142,7 @@ describe("SsmApplicationsMapRepository", () => { it("skips the put when dry-run is true", async () => { const { repository, send } = createRepository(); send.mockResolvedValueOnce({ - Parameter: { Value: JSON.stringify({}) }, + Body: mockBody(JSON.stringify({})), }); const result = await repository.addApplication("client-1", "app-1", true); @@ -143,7 +151,7 @@ describe("SsmApplicationsMapRepository", () => { expect(result).toEqual(new Map([["client-1", "app-1"]])); }); - it("rethrows unexpected SSM errors", async () => { + it("rethrows unexpected S3 errors", async () => { const { repository, send } = createRepository(); send.mockRejectedValueOnce( Object.assign(new Error("Network failure"), { name: "NetworkError" }), diff --git a/tools/client-subscriptions-management/src/__tests__/repository/s3.test.ts b/tools/client-subscriptions-management/src/__tests__/repository/s3.test.ts index 30a2ad43..ebd6c252 100644 --- a/tools/client-subscriptions-management/src/__tests__/repository/s3.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/repository/s3.test.ts @@ -22,6 +22,60 @@ describe("S3Repository", () => { expect(send.mock.calls[0][0]).toBeInstanceOf(GetObjectCommand); }); + it("prepends keyPrefix to getObject key", async () => { + const send = jest.fn().mockResolvedValue({ + Body: { transformToString: jest.fn().mockResolvedValue("content") }, + }); + const repository = new S3Repository( + "bucket", + { send } as unknown as S3Client, + "dev/", + ); + + await repository.getObject("client_subscriptions/client-1.json"); + + const command = send.mock.calls[0][0] as GetObjectCommand; + expect(command.input.Key).toBe("dev/client_subscriptions/client-1.json"); + }); + + it("prepends keyPrefix to putRawData key", async () => { + const send = jest.fn().mockResolvedValue({}); + const repository = new S3Repository( + "bucket", + { send } as unknown as S3Client, + "abc/", + ); + + await repository.putRawData("payload", "client_subscriptions/c.json"); + + const command = send.mock.calls[0][0] as PutObjectCommand; + expect(command.input.Key).toBe("abc/client_subscriptions/c.json"); + }); + + it("prepends keyPrefix to listObjectKeys prefix and strips it from returned keys", async () => { + const send = jest.fn().mockResolvedValue({ + Contents: [ + { Key: "main/client_subscriptions/a.json" }, + { Key: "main/client_subscriptions/b.json" }, + ], + NextContinuationToken: undefined, + }); + const repository = new S3Repository( + "bucket", + { send } as unknown as S3Client, + "main/", + ); + + const keys = await repository.listObjectKeys("client_subscriptions/"); + + const command = send.mock.calls[0][0] as ListObjectsV2Command; + expect(command.input.Prefix).toBe("main/client_subscriptions/"); + expect(keys).toEqual([ + "client_subscriptions/a.json", + "client_subscriptions/b.json", + ]); + }); + it("throws when body is missing", async () => { const send = jest.fn().mockResolvedValue({}); const repository = new S3Repository("bucket", { diff --git a/tools/client-subscriptions-management/src/aws.ts b/tools/client-subscriptions-management/src/aws.ts index 5599b50b..57ae1c85 100644 --- a/tools/client-subscriptions-management/src/aws.ts +++ b/tools/client-subscriptions-management/src/aws.ts @@ -1,9 +1,8 @@ import { S3Client } from "@aws-sdk/client-s3"; -import { SSMClient } from "@aws-sdk/client-ssm"; import { GetCallerIdentityCommand, STSClient } from "@aws-sdk/client-sts"; import { fromIni } from "@aws-sdk/credential-providers"; import { ClientSubscriptionRepository } from "src/repository/client-subscriptions"; -import SsmApplicationsMapRepository from "src/repository/ssm-applications-map"; +import S3ApplicationsMapRepository from "src/repository/s3-applications-map"; import { S3Repository } from "src/repository/s3"; export const resolveProfile = ( @@ -28,43 +27,55 @@ export const deriveBucketName = ( accountId: string, environment: string, region: string, -): string => - `nhs-${accountId}-${region}-${environment}-callbacks-subscription-config`; +): string => `nhs-${accountId}-${region}-main-acct-clie-client-configs`; export const resolveRegion = ( regionArg?: string, env: NodeJS.ProcessEnv = process.env, ): string | undefined => regionArg ?? env.AWS_REGION ?? env.AWS_DEFAULT_REGION; +export interface DeploymentContext { + environment: string; + region: string; + accountId: string; + profile?: string; +} + +export const resolveDeploymentContext = async (args: { + environment?: string; + region?: string; + profile?: string; +}): Promise => { + const profile = resolveProfile(args.profile); + const region = resolveRegion(args.region) ?? "eu-west-2"; + const environment = args.environment ?? process.env.ENVIRONMENT; + + if (!environment) { + throw new Error( + "Environment is required. Please provide via --environment or ENVIRONMENT env var.", + ); + } + + const accountId = + process.env.AWS_ACCOUNT_ID ?? (await resolveAccountId(profile, region)); + + return { environment, region, accountId, profile }; +}; + export const resolveBucketName = async (args: { bucketName?: string; environment?: string; profile?: string; region?: string; }): Promise => { - const { bucketName, environment, profile, region } = args; - - if (bucketName) { - return bucketName; + if (args.bucketName) { + return args.bucketName; } - const resolvedEnvironment = environment ?? process.env.ENVIRONMENT; - const resolvedRegion = resolveRegion(region) ?? "eu-west-2"; - const resolvedAccountId = - process.env.AWS_ACCOUNT_ID ?? - (await resolveAccountId(profile, resolvedRegion)); - - if (!resolvedEnvironment) { - throw new Error( - "Environment is required to derive bucket name. Please provide via --environment or ENVIRONMENT env var.", - ); - } + const { accountId, environment, region } = + await resolveDeploymentContext(args); - return deriveBucketName( - resolvedAccountId, - resolvedEnvironment, - resolvedRegion, - ); + return deriveBucketName(accountId, environment, region); }; export const createS3Client = ( @@ -80,58 +91,57 @@ export const createS3Client = ( export const createRepository = (options: { bucketName: string; + environment: string; region?: string; profile?: string; }): ClientSubscriptionRepository => { const s3Repository = new S3Repository( options.bucketName, createS3Client(options.region, options.profile), + `${options.environment}/`, ); return new ClientSubscriptionRepository(s3Repository); }; -export const createSsmClient = ( - region?: string, - profile?: string, - env: NodeJS.ProcessEnv = process.env, -): SSMClient => { - const endpoint = env.AWS_ENDPOINT_URL; - const credentials = profile ? fromIni({ profile }) : undefined; - return new SSMClient({ region, endpoint, credentials }); -}; +export const deriveApplicationsMapBucketName = ( + accountId: string, + region: string, +): string => `nhs-${accountId}-${region}-main-acct-clie-apps-map`; -export const deriveParameterName = (environment: string): string => - `/nhs/${environment}/callbacks/applications-map`; +export const deriveApplicationsMapKey = (environment: string): string => + `${environment}/applications-map.json`; -export const resolveParameterName = (args: { - parameterName?: string; +export const resolveApplicationsMapLocation = async (args: { + bucketName?: string; + key?: string; environment?: string; - env?: NodeJS.ProcessEnv; -}): string => { - const { env = process.env, environment, parameterName } = args; - - if (parameterName) { - return parameterName; + profile?: string; + region?: string; +}): Promise<{ bucket: string; key: string }> => { + if (args.bucketName && args.key) { + return { bucket: args.bucketName, key: args.key }; } - const resolvedEnvironment = environment ?? env.ENVIRONMENT; - if (!resolvedEnvironment) { - throw new Error( - "Environment is required to derive parameter name. Please provide via --environment or ENVIRONMENT env var.", - ); - } + const { accountId, environment, region } = + await resolveDeploymentContext(args); - return deriveParameterName(resolvedEnvironment); + return { + bucket: + args.bucketName ?? deriveApplicationsMapBucketName(accountId, region), + key: args.key ?? deriveApplicationsMapKey(environment), + }; }; -export const createSsmApplicationsMapRepository = (options: { - parameterName: string; +export const createS3ApplicationsMapRepository = (options: { + bucket: string; + key: string; region?: string; profile?: string; -}): SsmApplicationsMapRepository => - new SsmApplicationsMapRepository( - createSsmClient(options.region, options.profile), - options.parameterName, +}): S3ApplicationsMapRepository => + new S3ApplicationsMapRepository( + createS3Client(options.region, options.profile), + options.bucket, + options.key, ); -export { default as SsmApplicationsMapRepository } from "src/repository/ssm-applications-map"; +export { default as S3ApplicationsMapRepository } from "src/repository/s3-applications-map"; diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/applications-map-add.ts b/tools/client-subscriptions-management/src/entrypoint/cli/applications-map-add.ts index a98e574f..365d6e51 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/applications-map-add.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/applications-map-add.ts @@ -1,20 +1,20 @@ import type { Argv } from "yargs"; import { + type ApplicationsMapCliArgs, type CliCommand, type ClientCliArgs, - type SsmCliArgs, type WriteCliArgs, + applicationsMapOptions, clientIdOption, commonOptions, - createSsmApplicationsMapRepository, - parameterNameOption, + createS3ApplicationsMapRepository, runCommand, writeOptions, } from "src/entrypoint/cli/helper"; import { formatApplicationsMap } from "src/format"; type ApplicationsMapAddArgs = ClientCliArgs & - SsmCliArgs & + ApplicationsMapCliArgs & WriteCliArgs & { "application-id": string; }; @@ -23,7 +23,7 @@ export const builder = (yargs: Argv) => yargs.options({ ...commonOptions, ...clientIdOption, - ...parameterNameOption, + ...applicationsMapOptions, ...writeOptions, "application-id": { type: "string", @@ -35,7 +35,7 @@ export const builder = (yargs: Argv) => export const handler: CliCommand["handler"] = async ( argv, ) => { - const repository = createSsmApplicationsMapRepository(argv); + const repository = await createS3ApplicationsMapRepository(argv); const result = await repository.addApplication( argv["client-id"], argv["application-id"], @@ -43,14 +43,14 @@ export const handler: CliCommand["handler"] = async ( ); console.log(`Applications map updated for client '${argv["client-id"]}'.`); if (argv["dry-run"]) { - console.log("Dry run — no changes written to SSM."); + console.log("Dry run — no changes written to S3."); } console.log(formatApplicationsMap(result)); }; export const command: CliCommand = { command: "applications-map-add", - describe: "Add or update a client-to-application-ID mapping in SSM", + describe: "Add or update a client-to-application-ID mapping in S3", builder, handler, }; diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/applications-map-get.ts b/tools/client-subscriptions-management/src/entrypoint/cli/applications-map-get.ts index 5ffe2192..3e22db39 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/applications-map-get.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/applications-map-get.ts @@ -1,28 +1,28 @@ import type { Argv } from "yargs"; import { + type ApplicationsMapCliArgs, type CliCommand, type ClientCliArgs, - type SsmCliArgs, + applicationsMapOptions, clientIdOption, commonOptions, - createSsmApplicationsMapRepository, - parameterNameOption, + createS3ApplicationsMapRepository, runCommand, } from "src/entrypoint/cli/helper"; -type ApplicationsMapGetArgs = ClientCliArgs & SsmCliArgs; +type ApplicationsMapGetArgs = ClientCliArgs & ApplicationsMapCliArgs; export const builder = (yargs: Argv) => yargs.options({ ...commonOptions, ...clientIdOption, - ...parameterNameOption, + ...applicationsMapOptions, }); export const handler: CliCommand["handler"] = async ( argv, ) => { - const repository = createSsmApplicationsMapRepository(argv); + const repository = await createS3ApplicationsMapRepository(argv); const applicationId = await repository.getApplication(argv["client-id"]); if (applicationId) { diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/helper.ts b/tools/client-subscriptions-management/src/entrypoint/cli/helper.ts index 23070926..514626ca 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/helper.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/helper.ts @@ -4,11 +4,10 @@ import type { } from "@nhs-notify-client-callbacks/models"; import { createRepository as createRepositoryFromOptions, - createSsmApplicationsMapRepository as createSsmApplicationsMapRepositoryFromOptions, + createS3ApplicationsMapRepository as createS3ApplicationsMapRepositoryFromOptions, + resolveApplicationsMapLocation as resolveApplicationsMapLocationFromAws, resolveBucketName, - resolveParameterName as resolveParameterNameFromAws, - resolveProfile, - resolveRegion, + resolveDeploymentContext, } from "src/aws"; import { hideBin } from "yargs/helpers"; import yargs from "yargs/yargs"; @@ -41,15 +40,24 @@ export type WriteCliArgs = { }; export const createRepository = async (argv: CommonCliArgs) => { - const region = resolveRegion(argv.region); - const profile = resolveProfile(argv.profile); + const { environment, profile, region } = await resolveDeploymentContext({ + environment: argv.environment, + region: argv.region, + profile: argv.profile, + }); + const bucketName = await resolveBucketName({ bucketName: argv["bucket-name"], - environment: argv.environment, + environment, + region, + profile, + }); + return createRepositoryFromOptions({ + bucketName, + environment, region, profile, }); - return createRepositoryFromOptions({ bucketName, region, profile }); }; type BaseArgs = Record; @@ -145,28 +153,44 @@ export const writeOptions = { }, }; -export type SsmCliArgs = CommonCliArgs & { - "parameter-name"?: string; +export type ApplicationsMapCliArgs = CommonCliArgs & { + "applications-map-bucket"?: string; + "applications-map-key"?: string; }; -export const parameterNameOption = { - "parameter-name": { +export const applicationsMapOptions = { + "applications-map-bucket": { type: "string" as const, demandOption: false as const, description: - "Explicit SSM parameter name for the applications map (overrides derived name)", + "Explicit S3 bucket name for the applications map (overrides derived name)", + }, + "applications-map-key": { + type: "string" as const, + demandOption: false as const, + description: + "Explicit S3 key for the applications map (overrides derived key)", }, }; -export const createSsmApplicationsMapRepository = (argv: SsmCliArgs) => { - const region = resolveRegion(argv.region); - const profile = resolveProfile(argv.profile); - const parameterName = resolveParameterNameFromAws({ - parameterName: argv["parameter-name"], +export const createS3ApplicationsMapRepository = async ( + argv: ApplicationsMapCliArgs, +) => { + const { environment, profile, region } = await resolveDeploymentContext({ environment: argv.environment, + region: argv.region, + profile: argv.profile, + }); + const { bucket, key } = await resolveApplicationsMapLocationFromAws({ + bucketName: argv["applications-map-bucket"], + key: argv["applications-map-key"], + environment, + region, + profile, }); - return createSsmApplicationsMapRepositoryFromOptions({ - parameterName, + return createS3ApplicationsMapRepositoryFromOptions({ + bucket, + key, region, profile, }); diff --git a/tools/client-subscriptions-management/src/repository/s3-applications-map.ts b/tools/client-subscriptions-management/src/repository/s3-applications-map.ts new file mode 100644 index 00000000..f1465280 --- /dev/null +++ b/tools/client-subscriptions-management/src/repository/s3-applications-map.ts @@ -0,0 +1,69 @@ +import { + GetObjectCommand, + PutObjectCommand, + type S3Client, +} from "@aws-sdk/client-s3"; + +export default class S3ApplicationsMapRepository { + constructor( + private readonly client: S3Client, + private readonly bucket: string, + private readonly key: string, + ) {} + + async getApplication(clientId: string): Promise { + try { + const response = await this.client.send( + new GetObjectCommand({ Bucket: this.bucket, Key: this.key }), + ); + const body = await response.Body?.transformToString(); + if (body) { + const map = JSON.parse(body) as Record; + // eslint-disable-next-line security/detect-object-injection + return map[clientId]; + } + } catch (error) { + if (error instanceof Error && error.name !== "NoSuchKey") { + throw error; + } + } + return undefined; + } + + async addApplication( + clientId: string, + applicationId: string, + dryRun = false, + ): Promise> { + let current: Record = {}; + + try { + const response = await this.client.send( + new GetObjectCommand({ Bucket: this.bucket, Key: this.key }), + ); + const body = await response.Body?.transformToString(); + if (body) { + current = JSON.parse(body) as Record; + } + } catch (error) { + if (error instanceof Error && error.name !== "NoSuchKey") { + throw error; + } + } + + const updated = { ...current, [clientId]: applicationId }; + + if (!dryRun) { + await this.client.send( + new PutObjectCommand({ + Bucket: this.bucket, + Key: this.key, + Body: JSON.stringify(updated), + ContentType: "application/json", + }), + ); + } + + return new Map(Object.entries(updated)); + } +} diff --git a/tools/client-subscriptions-management/src/repository/s3.ts b/tools/client-subscriptions-management/src/repository/s3.ts index 75ffde9c..15eef66b 100644 --- a/tools/client-subscriptions-management/src/repository/s3.ts +++ b/tools/client-subscriptions-management/src/repository/s3.ts @@ -12,12 +12,17 @@ export class S3Repository { constructor( private readonly bucketName: string, private readonly s3Client: S3Client, + private readonly keyPrefix = "", ) {} + private prefixedKey(key: string): string { + return `${this.keyPrefix}${key}`; + } + async getObject(key: string): Promise { const params = { Bucket: this.bucketName, - Key: key, + Key: this.prefixedKey(key), }; try { const { Body } = await this.s3Client.send(new GetObjectCommand(params)); @@ -41,7 +46,7 @@ export class S3Repository { ): Promise { const params = { Bucket: this.bucketName, - Key: key, + Key: this.prefixedKey(key), Body: fileData, }; @@ -51,18 +56,19 @@ export class S3Repository { async listObjectKeys(prefix: string): Promise { const keys: string[] = []; let continuationToken: string | undefined; + const fullPrefix = this.prefixedKey(prefix); do { const { Contents, NextContinuationToken } = await this.s3Client.send( new ListObjectsV2Command({ Bucket: this.bucketName, - Prefix: prefix, + Prefix: fullPrefix, ContinuationToken: continuationToken, }), ); for (const obj of Contents ?? []) { if (obj.Key) { - keys.push(obj.Key); + keys.push(obj.Key.slice(this.keyPrefix.length)); } } continuationToken = NextContinuationToken; diff --git a/tools/client-subscriptions-management/src/repository/ssm-applications-map.ts b/tools/client-subscriptions-management/src/repository/ssm-applications-map.ts deleted file mode 100644 index a7edb3f6..00000000 --- a/tools/client-subscriptions-management/src/repository/ssm-applications-map.ts +++ /dev/null @@ -1,78 +0,0 @@ -import { - GetParameterCommand, - PutParameterCommand, - type SSMClient, -} from "@aws-sdk/client-ssm"; - -export default class SsmApplicationsMapRepository { - constructor( - private readonly client: SSMClient, - private readonly parameterName: string, - ) {} - - async getApplication(clientId: string): Promise { - try { - const response = await this.client.send( - new GetParameterCommand({ - Name: this.parameterName, - WithDecryption: true, - }), - ); - if (response.Parameter?.Value) { - const map = JSON.parse(response.Parameter.Value) as Record< - string, - string - >; - // eslint-disable-next-line security/detect-object-injection - return map[clientId]; - } - } catch (error) { - if (error instanceof Error && error.name !== "ParameterNotFound") { - throw error; - } - } - return undefined; - } - - async addApplication( - clientId: string, - applicationId: string, - dryRun = false, - ): Promise> { - let current: Record = {}; - - try { - const response = await this.client.send( - new GetParameterCommand({ - Name: this.parameterName, - WithDecryption: true, - }), - ); - if (response.Parameter?.Value) { - current = JSON.parse(response.Parameter.Value) as Record< - string, - string - >; - } - } catch (error) { - if (error instanceof Error && error.name !== "ParameterNotFound") { - throw error; - } - } - - const updated = { ...current, [clientId]: applicationId }; - - if (!dryRun) { - await this.client.send( - new PutParameterCommand({ - Name: this.parameterName, - Value: JSON.stringify(updated), - Type: "SecureString", - Overwrite: true, - }), - ); - } - - return new Map(Object.entries(updated)); - } -} From 75f1c81446abdf3ba4aa036b8cd9046f327c79b3 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Wed, 6 May 2026 11:51:50 +0100 Subject: [PATCH 43/65] Set appropriate resolutions for all metrics --- .../src/__tests__/services/metrics.test.ts | 12 ++++++------ .../src/services/metrics.ts | 12 ++++++------ .../src/__tests__/delivery-metrics.test.ts | 8 ++++---- .../src/services/delivery-metrics.ts | 8 ++++---- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/lambdas/client-transform-filter-lambda/src/__tests__/services/metrics.test.ts b/lambdas/client-transform-filter-lambda/src/__tests__/services/metrics.test.ts index f600cae1..ed774769 100644 --- a/lambdas/client-transform-filter-lambda/src/__tests__/services/metrics.test.ts +++ b/lambdas/client-transform-filter-lambda/src/__tests__/services/metrics.test.ts @@ -101,7 +101,7 @@ describe("CallbackMetrics", () => { "TransformationsSuccessful", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); }); }); @@ -114,7 +114,7 @@ describe("CallbackMetrics", () => { "TransformationsFailed", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); }); }); @@ -127,7 +127,7 @@ describe("CallbackMetrics", () => { "CallbacksInitiated", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); }); }); @@ -140,7 +140,7 @@ describe("CallbackMetrics", () => { "ValidationErrors", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); }); }); @@ -153,7 +153,7 @@ describe("CallbackMetrics", () => { "FilteringStarted", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); }); }); @@ -166,7 +166,7 @@ describe("CallbackMetrics", () => { "FilteringMatched", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); }); }); diff --git a/lambdas/client-transform-filter-lambda/src/services/metrics.ts b/lambdas/client-transform-filter-lambda/src/services/metrics.ts index fd98ec02..9984ed58 100644 --- a/lambdas/client-transform-filter-lambda/src/services/metrics.ts +++ b/lambdas/client-transform-filter-lambda/src/services/metrics.ts @@ -38,7 +38,7 @@ export class CallbackMetrics { "TransformationsSuccessful", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); } @@ -47,7 +47,7 @@ export class CallbackMetrics { "TransformationsFailed", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); } @@ -56,7 +56,7 @@ export class CallbackMetrics { "CallbacksInitiated", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); } @@ -65,7 +65,7 @@ export class CallbackMetrics { "ValidationErrors", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); } @@ -74,7 +74,7 @@ export class CallbackMetrics { "FilteringStarted", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); } @@ -83,7 +83,7 @@ export class CallbackMetrics { "FilteringMatched", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); } } diff --git a/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts b/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts index 635a3666..10212c29 100644 --- a/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts @@ -140,7 +140,7 @@ describe("delivery-metrics", () => { "DeliveryPermanentFailure", 1, "Count", - 1, + 60, ); }); @@ -155,7 +155,7 @@ describe("delivery-metrics", () => { "CircuitBreakerOpen", 1, "Count", - 1, + 60, ); }); @@ -185,7 +185,7 @@ describe("delivery-metrics", () => { "CircuitBreakerClosed", 1, "Count", - 1, + 60, ); }); @@ -200,7 +200,7 @@ describe("delivery-metrics", () => { "DeliveryRetryWindowExhausted", 1, "Count", - 1, + 60, ); }); diff --git a/lambdas/https-client-lambda/src/services/delivery-metrics.ts b/lambdas/https-client-lambda/src/services/delivery-metrics.ts index ff32c751..7b38dbb3 100644 --- a/lambdas/https-client-lambda/src/services/delivery-metrics.ts +++ b/lambdas/https-client-lambda/src/services/delivery-metrics.ts @@ -61,7 +61,7 @@ export function emitDeliveryPermanentFailure(targetId: string): void { "DeliveryPermanentFailure", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); } @@ -83,7 +83,7 @@ export function emitCircuitBreakerOpen(targetId: string): void { "CircuitBreakerOpen", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); } @@ -94,7 +94,7 @@ export function emitCircuitBreakerClosed(targetId: string): void { "CircuitBreakerClosed", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); } @@ -105,7 +105,7 @@ export function emitRetryWindowExhausted(targetId: string): void { "DeliveryRetryWindowExhausted", 1, Unit.Count, - StorageResolution.High, + StorageResolution.Standard, ); } From 38595d158528effa9517c1f88c1254043ec1a98d Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Wed, 6 May 2026 11:52:46 +0100 Subject: [PATCH 44/65] Fix tf example comment --- infrastructure/terraform/components/callbacks/locals.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/terraform/components/callbacks/locals.tf b/infrastructure/terraform/components/callbacks/locals.tf index 20c735fc..0d84a61b 100644 --- a/infrastructure/terraform/components/callbacks/locals.tf +++ b/infrastructure/terraform/components/callbacks/locals.tf @@ -4,7 +4,7 @@ locals { client_csi = "${var.project}-${var.environment}-cbc" aws_lambda_functions_dir_path = "../../../../lambdas" log_destination_arn = "arn:aws:firehose:${var.region}:${var.aws_account_id}:deliverystream/nhs-main-obs-splunk-logs-firehose" - root_domain_name = "${var.environment}.${local.acct.route53_zone_names[local.bc_name]}" # e.g. [main|dev|abxy0].smsnudge.[dev|nonprod|prod].nhsnotify.national.nhs.uk + root_domain_name = "${var.environment}.${local.acct.route53_zone_names[local.bc_name]}" # e.g. [main|dev|abxy0].client-callbacks.[dev|nonprod|prod].nhsnotify.national.nhs.uk root_domain_id = local.acct.route53_zone_ids[local.bc_name] clients_dir_path = "${path.module}/../../modules/clients" From 425ce1c3417aba90dfd67bb476eb4b0683604a0e Mon Sep 17 00:00:00 2001 From: Tim Marston Date: Wed, 6 May 2026 12:03:07 +0100 Subject: [PATCH 45/65] linting fix --- .../src/services/transformers/message-status-transformer.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/lambdas/client-transform-filter-lambda/src/services/transformers/message-status-transformer.ts b/lambdas/client-transform-filter-lambda/src/services/transformers/message-status-transformer.ts index 049cfb02..d4b493db 100644 --- a/lambdas/client-transform-filter-lambda/src/services/transformers/message-status-transformer.ts +++ b/lambdas/client-transform-filter-lambda/src/services/transformers/message-status-transformer.ts @@ -41,7 +41,9 @@ export function transformMessageStatus( attributes.messageFailureReasonCode = data.messageFailureReasonCode; } - const { timestamp: _, ...idempotencyBody } = attributes; + const idempotencyBody = Object.fromEntries( + Object.entries(attributes).filter(([key]) => key !== "timestamp"), + ); const idempotencyKey = createHash("sha256") .update(JSON.stringify(idempotencyBody)) .digest("hex"); From fa98f6e963135b1ce05cf2569c0db0c4672ed0b9 Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Wed, 6 May 2026 13:23:48 +0100 Subject: [PATCH 46/65] CCM-16073 - Fixed build --- docs/Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/Makefile b/docs/Makefile index a7854d6a..cafd8ae7 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -8,7 +8,7 @@ h help: @egrep '^\S|^$$' Makefile install: - pnpm install + npm install bundle config set --local path vendor/bundle bundle install @@ -20,10 +20,10 @@ $(if $(BASE_URL),-- --baseurl $(BASE_URL),-- --baseurl "") endef build: version - pnpm run build $(baseurlparam) + npm run build $(baseurlparam) debug: version - pnpm run debug + npm run debug version: touch _config.version.yml From 76384e1caee6a9fa7faff120757fedde62de9143 Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Wed, 6 May 2026 15:39:40 +0100 Subject: [PATCH 47/65] CCM-16073 - PR feedback --- .../modules/client-delivery/README.md | 1 + ...get.tf => iam_role_https_client_lambda.tf} | 0 .../module_https_client_lambda.tf | 4 +++ .../modules/client-delivery/variables.tf | 6 ++++ .../src/__tests__/admit-lua.test.ts | 23 ++++++++++++ .../src/__tests__/config-loader.test.ts | 12 +++++++ .../src/__tests__/endpoint-gate.test.ts | 8 +++++ .../src/__tests__/handler.test.ts | 20 ++++++++++- .../src/__tests__/https-client.test.ts | 36 ------------------- .../src/__tests__/retry-policy.test.ts | 4 +-- .../src/__tests__/tls-agent-factory.test.ts | 18 ++++++++++ lambdas/https-client-lambda/src/handler.ts | 5 +++ .../src/services/client-rate-limited-error.ts | 1 + .../src/services/delivery/retry-policy.ts | 4 +-- 14 files changed, 101 insertions(+), 41 deletions(-) rename infrastructure/terraform/modules/client-delivery/{iam_role_sqs_target.tf => iam_role_https_client_lambda.tf} (100%) create mode 100644 lambdas/https-client-lambda/src/services/client-rate-limited-error.ts diff --git a/infrastructure/terraform/modules/client-delivery/README.md b/infrastructure/terraform/modules/client-delivery/README.md index f8c742a2..6acbeb80 100644 --- a/infrastructure/terraform/modules/client-delivery/README.md +++ b/infrastructure/terraform/modules/client-delivery/README.md @@ -23,6 +23,7 @@ No requirements. | [delivery\_lambda\_batch\_size](#input\_delivery\_lambda\_batch\_size) | Number of SQS messages per Lambda invocation | `number` | `100` | no | | [delivery\_lambda\_batching\_window\_sec](#input\_delivery\_lambda\_batching\_window\_sec) | Maximum time in seconds to wait for a full batch before invoking Lambda. Allows the delivery queue to fill to batch\_size, improving Lambda concurrency utilisation. | `number` | `1` | no | | [delivery\_lambda\_code\_base\_path](#input\_delivery\_lambda\_code\_base\_path) | Base path to Lambda source code directories | `string` | n/a | yes | +| [delivery\_lambda\_max\_concurrency](#input\_delivery\_lambda\_max\_concurrency) | Maximum concurrent Lambda invocations for the SQS event source mapping | `number` | `200` | no | | [delivery\_lambda\_memory](#input\_delivery\_lambda\_memory) | Lambda memory allocation in MB | `number` | `256` | no | | [delivery\_lambda\_s3\_bucket](#input\_delivery\_lambda\_s3\_bucket) | S3 bucket for Lambda function artefacts | `string` | n/a | yes | | [delivery\_lambda\_security\_group\_id](#input\_delivery\_lambda\_security\_group\_id) | Security group ID for the Lambda function | `string` | `""` | no | diff --git a/infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf b/infrastructure/terraform/modules/client-delivery/iam_role_https_client_lambda.tf similarity index 100% rename from infrastructure/terraform/modules/client-delivery/iam_role_sqs_target.tf rename to infrastructure/terraform/modules/client-delivery/iam_role_https_client_lambda.tf diff --git a/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf b/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf index 3abd532c..abf75968 100644 --- a/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf +++ b/infrastructure/terraform/modules/client-delivery/module_https_client_lambda.tf @@ -71,5 +71,9 @@ resource "aws_lambda_event_source_mapping" "sqs_delivery" { maximum_batching_window_in_seconds = var.delivery_lambda_batching_window_sec enabled = true + scaling_config { + maximum_concurrency = var.delivery_lambda_max_concurrency + } + function_response_types = ["ReportBatchItemFailures"] } diff --git a/infrastructure/terraform/modules/client-delivery/variables.tf b/infrastructure/terraform/modules/client-delivery/variables.tf index fde3edbb..6729cbc1 100644 --- a/infrastructure/terraform/modules/client-delivery/variables.tf +++ b/infrastructure/terraform/modules/client-delivery/variables.tf @@ -138,6 +138,12 @@ variable "delivery_lambda_batching_window_sec" { default = 1 } +variable "delivery_lambda_max_concurrency" { + type = number + description = "Maximum concurrent Lambda invocations for the SQS event source mapping" + default = 200 +} + variable "delivery_lambda_memory" { type = number description = "Lambda memory allocation in MB" diff --git a/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts b/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts index 2d6b2d8e..18198e2a 100644 --- a/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts @@ -544,6 +544,29 @@ describe("admit.lua", () => { expect(effectiveRate).toBe(10); }); + it("returns rate_limited on fresh endpoint unlike cbEnabled=true which grants a probe token", () => { + const store = createRedisStore(); + const now = 1_000_000; + + const withCbDisabled = runAdmit(store, { + now, + targetRateLimit: 10, + cbEnabled: false, + }); + + const freshStore = createRedisStore(); + const withCbEnabled = runAdmit(freshStore, { + now, + targetRateLimit: 10, + cbEnabled: true, + }); + + expect(withCbDisabled.consumedTokens).toBe(0); + expect(withCbDisabled.reason).toBe("rate_limited"); + expect(withCbEnabled.consumedTokens).toBe(1); + expect(withCbEnabled.reason).toBe("some_allowed"); + }); + it("applies initial values on fresh endpoint so first call has no tokens", () => { const store = createRedisStore(); const now = 1_000_000; diff --git a/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts b/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts index 5c133ca6..fc880afa 100644 --- a/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts @@ -198,4 +198,16 @@ describe("loadTargetConfig", () => { spy.mockRestore(); }); + + it("propagates S3 NoSuchKey error from loadClientConfig", async () => { + const noSuchKeyError = Object.assign(new Error("NoSuchKey"), { + name: "NoSuchKey", + }); + mockS3Send.mockRejectedValue(noSuchKeyError); + resetCache(); + + await expect( + loadTargetConfig("unknown-client", "target-1"), + ).rejects.toThrow("NoSuchKey"); + }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts index 4e697132..901307dc 100644 --- a/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/endpoint-gate.test.ts @@ -277,6 +277,14 @@ describe("recordResult", () => { expect(result).toEqual({ circuitState: "closed", circuitSwitched: false }); expect(mockSendCommand).toHaveBeenCalledTimes(2); + expect(mockSendCommand).toHaveBeenNthCalledWith( + 1, + expect.arrayContaining(["EVALSHA"]), + ); + expect(mockSendCommand).toHaveBeenNthCalledWith( + 2, + expect.arrayContaining(["EVAL"]), + ); }); it("passes correct ep key for target", async () => { diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 2d48d865..5ad30dfc 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -3,6 +3,7 @@ import { DEFAULT_TARGET, makeRecord, } from "__tests__/fixtures/handler-fixtures"; +import { ClientRateLimitedError } from "services/client-rate-limited-error"; import { VisibilityManagedError } from "services/visibility-managed-error"; jest.mock("@nhs-notify-client-callbacks/logger", () => ({ @@ -109,7 +110,7 @@ describe("processRecords", () => { mockJitteredBackoff.mockReturnValue(5); mockIsWindowExhausted.mockReturnValue(false); mockHandleRateLimitedRecord.mockRejectedValue( - new VisibilityManagedError("Rate limited — requeue"), + new ClientRateLimitedError("Rate limited — requeue"), ); mockGetRedisClient.mockResolvedValue({}); mockAdmit.mockResolvedValue({ @@ -275,6 +276,23 @@ describe("processRecords", () => { expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 5); }); + it("does not override visibility on ClientRateLimitedError", async () => { + mockDeliverPayload.mockResolvedValue({ + outcome: "rate_limited", + retryAfterHeader: "120", + statusCode: 429, + }); + mockHandleRateLimitedRecord.mockImplementation(async () => { + await mockChangeVisibility("receipt-1", 120); + throw new ClientRateLimitedError("Rate limited — requeue"); + }); + + await processRecords([makeRecord()]); + + expect(mockChangeVisibility).toHaveBeenCalledTimes(1); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 120); + }); + it("changes visibility once per record for admission-denied batch", async () => { mockAdmit.mockResolvedValue({ allowed: false, diff --git a/lambdas/https-client-lambda/src/__tests__/https-client.test.ts b/lambdas/https-client-lambda/src/__tests__/https-client.test.ts index a6229c57..50ddf14c 100644 --- a/lambdas/https-client-lambda/src/__tests__/https-client.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/https-client.test.ts @@ -271,42 +271,6 @@ describe("deliverPayload", () => { expect(result).toEqual({ outcome: "transient_failure", statusCode: 0 }); }); - it("treats undefined statusCode as transient failure with code 0", async () => { - const mockReq = new EventEmitter() as EventEmitter & { - end: jest.Mock; - destroy: jest.Mock; - }; - mockReq.end = jest.fn(); - mockReq.destroy = jest.fn(); - - jest.spyOn(https, "request").mockImplementation((...args: unknown[]) => { - const callback = args.find((a) => typeof a === "function") as - | ((res: MockResponse) => void) - | undefined; - - const res: MockResponse = Object.assign(new EventEmitter(), { - statusCode: undefined as unknown as number, - headers: {}, - resume: jest.fn(), - }); - - if (callback) { - process.nextTick(() => callback(res)); - } - - return mockReq as unknown as ReturnType; - }); - - const result = await deliverPayload( - createTarget(), - '{"test":true}', - "sig-abc", - createMockAgent(), - ); - - expect(result).toEqual({ outcome: "transient_failure", statusCode: 0 }); - }); - it("treats undefined statusCode as 0", async () => { const mockReq = new EventEmitter() as EventEmitter & { end: jest.Mock; diff --git a/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts b/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts index c53927fb..d20cc0b8 100644 --- a/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts @@ -6,7 +6,7 @@ import { jitteredBackoffSeconds, parseRetryAfter, } from "services/delivery/retry-policy"; -import { VisibilityManagedError } from "services/visibility-managed-error"; +import { ClientRateLimitedError } from "services/client-rate-limited-error"; const mockSendToDlq = jest.fn(); jest.mock("services/dlq-sender", () => ({ @@ -182,6 +182,6 @@ describe("handleRateLimitedRecord", () => { it("throws after requeuing so SQS marks the record as failed", async () => { await expect( handleRateLimitedRecord(makeRecord(), "client-1", "target-1", "30", 1), - ).rejects.toThrow(VisibilityManagedError); + ).rejects.toThrow(ClientRateLimitedError); }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts b/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts index 95de2d6d..4790f4eb 100644 --- a/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts @@ -386,4 +386,22 @@ describe("tls-agent-factory", () => { expect(agent).toBeDefined(); }); + + it("reloads cert material when cached cert is approaching expiry", async () => { + // Use a threshold larger than the mock cert's remaining validity (365 days) + process.env.CERT_EXPIRY_THRESHOLD_MS = String(400 * 86_400_000); + jest.resetModules(); + // @ts-expect-error -- modulePaths resolves at runtime + const mod = await import("services/delivery/tls-agent-factory"); + + mockS3PemResponse(COMBINED_PEM); + const target = createTarget({ delivery: { mtls: { enabled: true } } }); + + await mod.buildAgent(target); + + mockS3PemResponse(COMBINED_PEM); + await mod.buildAgent(target); + + expect(mockS3Send).toHaveBeenCalledTimes(2); + }); }); diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index 9e02b372..6be74870 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -19,6 +19,7 @@ import { isWindowExhausted, jitteredBackoffSeconds, } from "services/delivery/retry-policy"; +import { ClientRateLimitedError } from "services/client-rate-limited-error"; import { VisibilityManagedError } from "services/visibility-managed-error"; import { type EndpointGateConfig, @@ -305,6 +306,10 @@ async function processTargetBatch( err: error, }); + if (error instanceof ClientRateLimitedError) { + return { record, success: false, dlq: false }; + } + if (error instanceof VisibilityManagedError) { const receiveCount = Number( record.attributes.ApproximateReceiveCount, diff --git a/lambdas/https-client-lambda/src/services/client-rate-limited-error.ts b/lambdas/https-client-lambda/src/services/client-rate-limited-error.ts new file mode 100644 index 00000000..f1968e7b --- /dev/null +++ b/lambdas/https-client-lambda/src/services/client-rate-limited-error.ts @@ -0,0 +1 @@ +export class ClientRateLimitedError extends Error {} diff --git a/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts b/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts index 2bde6516..6baa692e 100644 --- a/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts +++ b/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts @@ -2,7 +2,7 @@ import type { SQSRecord } from "aws-lambda"; import { logger } from "@nhs-notify-client-callbacks/logger"; import { sendToDlq } from "services/dlq-sender"; import { changeVisibility } from "services/sqs-visibility"; -import { VisibilityManagedError } from "services/visibility-managed-error"; +import { ClientRateLimitedError } from "services/client-rate-limited-error"; const BACKOFF_CAP_SECONDS = 300; const SQS_MAX_VISIBILITY_SECONDS = 43_200; @@ -76,5 +76,5 @@ export async function handleRateLimitedRecord( delaySec, }); await changeVisibility(record.receiptHandle, delaySec); - throw new VisibilityManagedError("Rate limited — requeue"); + throw new ClientRateLimitedError("Rate limited — requeue"); } From f5246312abbe41c182be054541c8864d3389cd0d Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Wed, 6 May 2026 16:08:10 +0100 Subject: [PATCH 48/65] CCM-16073 - PR feedback --- .../src/__tests__/retry-policy.test.ts | 126 ++++++++++++------ 1 file changed, 87 insertions(+), 39 deletions(-) diff --git a/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts b/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts index d20cc0b8..12656385 100644 --- a/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts @@ -140,48 +140,96 @@ describe("handleRateLimitedRecord", () => { mockChangeVisibility.mockResolvedValue(undefined); }); - it("sends to DLQ and returns when Retry-After exceeds SQS max visibility", async () => { - await handleRateLimitedRecord( - makeRecord(), - "client-1", - "target-1", - "50000", - 1, - ); - - expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body); - expect(mockChangeVisibility).not.toHaveBeenCalled(); - }); - - it("uses Retry-After value for changeVisibility when within SQS max", async () => { - await expect( - handleRateLimitedRecord(makeRecord(), "client-1", "target-1", "120", 1), - ).rejects.toThrow("Rate limited — requeue"); - - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 120); - expect(mockSendToDlq).not.toHaveBeenCalled(); - }); - - it("uses jittered backoff when no Retry-After header provided", async () => { - await expect( - handleRateLimitedRecord( + describe("429 under retry period", () => { + it("retries with Retry-After delay when within SQS max visibility", async () => { + await expect( + handleRateLimitedRecord(makeRecord(), "client-1", "target-1", "120", 1), + ).rejects.toThrow(ClientRateLimitedError); + + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 120); + expect(mockSendToDlq).not.toHaveBeenCalled(); + }); + + it("retries with jittered backoff when no Retry-After header provided", async () => { + await expect( + handleRateLimitedRecord( + makeRecord(), + "client-1", + "target-1", + undefined, + 1, + ), + ).rejects.toThrow(ClientRateLimitedError); + + expect(mockChangeVisibility).toHaveBeenCalled(); + const [, delaySec] = mockChangeVisibility.mock.calls[0] as [ + string, + number, + ]; + expect(delaySec).toBeGreaterThanOrEqual(1); + expect(delaySec).toBeLessThan(5); + expect(mockSendToDlq).not.toHaveBeenCalled(); + }); + }); + + describe("429 over retry period but under SQS limit", () => { + it("retries when Retry-After exceeds max retry duration but is within SQS max visibility", async () => { + await expect( + handleRateLimitedRecord( + makeRecord(), + "client-1", + "target-1", + "10000", + 1, + ), + ).rejects.toThrow(ClientRateLimitedError); + + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 10_000); + expect(mockSendToDlq).not.toHaveBeenCalled(); + }); + + it("retries at SQS max visibility boundary (43200s)", async () => { + await expect( + handleRateLimitedRecord( + makeRecord(), + "client-1", + "target-1", + "43200", + 1, + ), + ).rejects.toThrow(ClientRateLimitedError); + + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 43_200); + expect(mockSendToDlq).not.toHaveBeenCalled(); + }); + }); + + describe("429 over SQS limit", () => { + it("sends to DLQ when Retry-After exceeds SQS max visibility", async () => { + await handleRateLimitedRecord( makeRecord(), "client-1", "target-1", - undefined, + "50000", 1, - ), - ).rejects.toThrow("Rate limited — requeue"); - - expect(mockChangeVisibility).toHaveBeenCalled(); - const [, delaySec] = mockChangeVisibility.mock.calls[0] as [string, number]; - expect(delaySec).toBeGreaterThanOrEqual(0); - expect(delaySec).toBeLessThan(5); - }); - - it("throws after requeuing so SQS marks the record as failed", async () => { - await expect( - handleRateLimitedRecord(makeRecord(), "client-1", "target-1", "30", 1), - ).rejects.toThrow(ClientRateLimitedError); + ); + + expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body); + expect(mockChangeVisibility).not.toHaveBeenCalled(); + }); + + it("does not throw when sending to DLQ", async () => { + await expect( + handleRateLimitedRecord( + makeRecord(), + "client-1", + "target-1", + "43201", + 1, + ), + ).resolves.toBeUndefined(); + + expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body); + }); }); }); From 6500c5d3b7dd25df12025693a2a53c85ad579c7a Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Thu, 7 May 2026 08:49:39 +0100 Subject: [PATCH 49/65] CCM-16073 - PR feedback --- .../src/__tests__/handler.test.ts | 39 ++++++++++++++++++- lambdas/https-client-lambda/src/handler.ts | 3 +- 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 5ad30dfc..bb7f1a4e 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -463,6 +463,41 @@ describe("processRecords", () => { expect(mockDeliverPayload).toHaveBeenCalled(); }); + it("defaults cbEnabled to true when delivery exists but circuitBreaker is absent", async () => { + const targetDeliveryNoCb = { + ...DEFAULT_TARGET, + delivery: { mtls: { enabled: true } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetDeliveryNoCb); + + await processRecords([makeRecord()]); + + expect(mockAdmit).toHaveBeenCalledWith( + expect.anything(), + "target-1", + 10, + true, + 1, + expect.objectContaining({ burstCapacity: 50 }), + ); + }); + + it("defaults cbEnabled to false when delivery is absent", async () => { + const targetNoDelivery = { ...DEFAULT_TARGET, delivery: undefined }; + mockLoadTargetConfig.mockResolvedValue(targetNoDelivery); + + await processRecords([makeRecord()]); + + expect(mockAdmit).toHaveBeenCalledWith( + expect.anything(), + "target-1", + 10, + false, + 1, + expect.objectContaining({ burstCapacity: 50 }), + ); + }); + it("computes burst capacity as invocationRateLimit * 5", async () => { const targetHighRate = { ...DEFAULT_TARGET, @@ -476,7 +511,7 @@ describe("processRecords", () => { expect.anything(), "target-1", 100, - false, + true, 1, expect.objectContaining({ burstCapacity: 500 }), ); @@ -495,7 +530,7 @@ describe("processRecords", () => { expect.anything(), "target-1", 1000, - false, + true, 1, expect.objectContaining({ burstCapacity: 2250 }), ); diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index 6be74870..956cfa79 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -241,7 +241,8 @@ async function processTargetBatch( concurrencyLimit: number, ): Promise { const target = await loadTargetConfig(clientId, batch.targetId); - const cbEnabled = target.delivery?.circuitBreaker?.enabled ?? false; + const cbEnabled = + target.delivery?.circuitBreaker?.enabled ?? Boolean(target.delivery); const targetBurstCapacity = Math.min( target.invocationRateLimit * BURST_MULTIPLIER, From 04c301ff08a2ce4d2380e25a1c1a4b279a77f9a4 Mon Sep 17 00:00:00 2001 From: Rhys Cox Date: Thu, 7 May 2026 09:19:55 +0100 Subject: [PATCH 50/65] CCM-16073 - PR feedback --- .../fixtures/subscriptions/mock-client-fan-out.json | 6 ++++++ .../fixtures/subscriptions/mock-client-mtls.json | 3 +++ .../fixtures/subscriptions/mock-client-rate-limit.json | 3 +++ .../fixtures/subscriptions/mock-client-short-retry.json | 3 +++ .../fixtures/subscriptions/mock-client-single-target.json | 3 +++ 5 files changed, 18 insertions(+) diff --git a/tests/integration/fixtures/subscriptions/mock-client-fan-out.json b/tests/integration/fixtures/subscriptions/mock-client-fan-out.json index 14985d4c..0a606537 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-fan-out.json +++ b/tests/integration/fixtures/subscriptions/mock-client-fan-out.json @@ -21,6 +21,9 @@ "headerValue": "REPLACED_BY_TERRAFORM" }, "delivery": { + "circuitBreaker": { + "enabled": false + }, "mtls": { "certPinning": { "enabled": false @@ -40,6 +43,9 @@ "headerValue": "REPLACED_BY_TERRAFORM" }, "delivery": { + "circuitBreaker": { + "enabled": false + }, "mtls": { "certPinning": { "enabled": false diff --git a/tests/integration/fixtures/subscriptions/mock-client-mtls.json b/tests/integration/fixtures/subscriptions/mock-client-mtls.json index 0fce5d72..3e78197a 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-mtls.json +++ b/tests/integration/fixtures/subscriptions/mock-client-mtls.json @@ -20,6 +20,9 @@ "headerValue": "REPLACED_BY_TERRAFORM" }, "delivery": { + "circuitBreaker": { + "enabled": false + }, "mtls": { "certPinning": { "enabled": true, diff --git a/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json b/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json index 1129c337..0c798742 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json +++ b/tests/integration/fixtures/subscriptions/mock-client-rate-limit.json @@ -20,6 +20,9 @@ "headerValue": "REPLACED_BY_TERRAFORM" }, "delivery": { + "circuitBreaker": { + "enabled": false + }, "mtls": { "certPinning": { "enabled": false diff --git a/tests/integration/fixtures/subscriptions/mock-client-short-retry.json b/tests/integration/fixtures/subscriptions/mock-client-short-retry.json index d6528102..2cc672d4 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-short-retry.json +++ b/tests/integration/fixtures/subscriptions/mock-client-short-retry.json @@ -20,6 +20,9 @@ "headerValue": "REPLACED_BY_TERRAFORM" }, "delivery": { + "circuitBreaker": { + "enabled": false + }, "maxRetryDurationSeconds": 10, "mtls": { "certPinning": { diff --git a/tests/integration/fixtures/subscriptions/mock-client-single-target.json b/tests/integration/fixtures/subscriptions/mock-client-single-target.json index 41422f4b..ce8f9d11 100644 --- a/tests/integration/fixtures/subscriptions/mock-client-single-target.json +++ b/tests/integration/fixtures/subscriptions/mock-client-single-target.json @@ -36,6 +36,9 @@ "headerValue": "REPLACED_BY_TERRAFORM" }, "delivery": { + "circuitBreaker": { + "enabled": false + }, "mtls": { "certPinning": { "enabled": false From e08a5d2038b242af72e07a71a08b1f480479e39c Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 09:35:23 +0100 Subject: [PATCH 51/65] Refactor/bolster rate limit unit tests --- .../src/__tests__/admit-lua.test.ts | 496 +++++++++++------- .../src/__tests__/record-result-lua.test.ts | 318 +++++++++-- 2 files changed, 593 insertions(+), 221 deletions(-) diff --git a/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts b/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts index 18198e2a..930198c6 100644 --- a/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/admit-lua.test.ts @@ -64,8 +64,8 @@ function runAdmit( } describe("admit.lua", () => { - describe("rate limiting", () => { - it("allows one initial probe token on a fresh endpoint with no prior state", () => { + describe("first contact (no prior state)", () => { + it("allows one initial probe token on a fresh endpoint", () => { const store = createRedisStore(); const now = 1_000_000; @@ -93,66 +93,9 @@ describe("admit.lua", () => { expect(consumedTokens).toBe(1); expect(reason).toBe("some_allowed"); }); + }); - it("does not persist circuit state on first contact", () => { - const store = createRedisStore(); - const now = 1_000_000; - - runAdmit(store, { now, targetRateLimit: 10 }); - - const epHash = store.get("ep:t1")!; - expect(epHash.has("is_open")).toBe(false); - expect(epHash.has("switched_at")).toBe(false); - }); - - it("allows full rate after record-result closes the circuit", () => { - const store = createRedisStore(); - const now = 1_000_000; - - store.set( - "ep:t1", - new Map([ - ["is_open", "0"], - ["switched_at", now.toString()], - ["bucket_tokens", "0"], - ["bucket_refilled_at", now.toString()], - ]), - ); - - const later = now + 60_000; - const { consumedTokens, reason } = runAdmit(store, { - now: later, - targetRateLimit: 10, - recoveryPeriodMs: 600_000, - }); - - expect(consumedTokens).toBeGreaterThanOrEqual(1); - expect(reason).toBe("some_allowed"); - }); - - it("allows a single request when bucket has tokens from refill", () => { - const store = createRedisStore(); - const now = 1_000_000; - store.set( - "ep:t1", - new Map([ - ["is_open", "0"], - ["bucket_tokens", "0"], - ["bucket_refilled_at", "0"], - ["switched_at", "0"], - ]), - ); - - const { consumedTokens, reason, retryAfterMs } = runAdmit(store, { - now, - targetRateLimit: 10, - }); - - expect(consumedTokens).toBe(1); - expect(reason).toBe("some_allowed"); - expect(retryAfterMs).toBe(0); - }); - + describe("token bucket", () => { it("consumes up to targetBatchSize tokens", () => { const store = createRedisStore(); const now = 1_000_000; @@ -240,13 +183,13 @@ describe("admit.lua", () => { new Map([ ["is_open", "0"], ["bucket_tokens", "0"], - ["bucket_refilled_at", "0"], + ["bucket_refilled_at", now.toString()], ["switched_at", "0"], ]), ); const { consumedTokens } = runAdmit(store, { - now, + now: now + 1000, capacity: 5, targetRateLimit: 100, targetBatchSize: 10, @@ -275,29 +218,122 @@ describe("admit.lua", () => { expect(reason).toBe("rate_limited"); }); - it("preserves fractional refill time (bucketRefilledAt += generationTime, not now)", () => { + it("advances bucket_refilled_at by token cost to preserve the sub-token remainder for the next call", () => { const store = createRedisStore(); + const ratePerSecond = 10; + const msPerToken = 1000 / ratePerSecond; // 100ms + const elapsedMs = 150; // enough elapsed for 1 token (100ms), with 50ms remainder const now = 1_000_000; store.set( "ep:t1", new Map([ ["is_open", "0"], ["bucket_tokens", "0"], - ["bucket_refilled_at", (now - 150).toString()], + ["bucket_refilled_at", (now - elapsedMs).toString()], ["switched_at", "0"], ]), ); - runAdmit(store, { now, targetRateLimit: 10 }); + runAdmit(store, { now, targetRateLimit: ratePerSecond }); const epHash = store.get("ep:t1")!; const refilledAt = Number(epHash.get("bucket_refilled_at")); - // 1 token generated at rate 10/s takes 100ms, so refilledAt = (now-150) + 100 = now - 50 - expect(refilledAt).toBe(now - 50); + expect(refilledAt).not.toBe(now); // must not advance to now (elapsed time) + expect(refilledAt).toBe(now - elapsedMs + msPerToken); // must advance by token cost only + }); + + it("does not update bucket_refilled_at when no tokens are generated", () => { + const store = createRedisStore(); + const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["bucket_tokens", "5"], + ["bucket_refilled_at", now.toString()], + ["switched_at", "0"], + ]), + ); + + runAdmit(store, { now, targetRateLimit: 10 }); + + const epHash = store.get("ep:t1")!; + expect(Number(epHash.get("bucket_refilled_at"))).toBe(now); + }); + + it("returns retryAfterMs=0 when some tokens are consumed", () => { + const store = createRedisStore(); + const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["bucket_tokens", "5"], + ["bucket_refilled_at", now.toString()], + ["switched_at", "0"], + ]), + ); + + const { retryAfterMs } = runAdmit(store, { now, targetBatchSize: 3 }); + + expect(retryAfterMs).toBe(0); + }); + + it("returns rate_limited when targetBatchSize is 0", () => { + const store = createRedisStore(); + const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["bucket_tokens", "5"], + ["bucket_refilled_at", now.toString()], + ["switched_at", "0"], + ]), + ); + + const { consumedTokens, reason, retryAfterMs } = runAdmit(store, { + now, + targetBatchSize: 0, + }); + + expect(consumedTokens).toBe(0); + expect(reason).toBe("rate_limited"); + expect(retryAfterMs).toBe(1000); + }); + + it("accumulates tokens accurately across multiple sequential calls", () => { + const store = createRedisStore(); + const ratePerSecond = 10; + const msPerToken = 1000 / ratePerSecond; // 100ms + const callCount = 10; + const start = 1_000_000; + + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["bucket_tokens", "0"], + ["bucket_refilled_at", start.toString()], + ]), + ); + + let totalConsumed = 0; + for (let i = 1; i <= callCount; i++) { + const { consumedTokens } = runAdmit(store, { + now: start + i * msPerToken, + targetRateLimit: ratePerSecond, + targetBatchSize: 1, + }); + totalConsumed += consumedTokens; + } + + expect(totalConsumed).toBe(callCount); }); }); - describe("circuit breaker states", () => { + describe("circuit breaker — open (during cooldown)", () => { it("blocks completely when circuit is open during cooldown", () => { const store = createRedisStore(); const now = 1_000_000; @@ -357,8 +393,10 @@ describe("admit.lua", () => { const { retryAfterMs } = runAdmit(store, { now, cooldownMs: 120_000 }); expect(retryAfterMs).toBe(110_000); }); + }); - it("uses probeRateLimit when half-open (after cooldown)", () => { + describe("circuit breaker — half-open (after cooldown)", () => { + it("uses probeRateLimit when half-open", () => { const store = createRedisStore(); const now = 1_000_000; const switchedAt = now - 130_000; @@ -407,11 +445,39 @@ describe("admit.lua", () => { expect(Number(epHash.get("bucket_tokens"))).toBe(0); }); - it("uses recovery ramp when closed during recovery period", () => { + it("gives 1 probe token when half-open with no prior bucket state", () => { const store = createRedisStore(); - const switchedAt = 1_000_000; - const recoveryPeriodMs = 600_000; - const now = switchedAt + recoveryPeriodMs / 2; + const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "1"], + ["switched_at", (now - 130_000).toString()], + ]), + ); + + const { consumedTokens, effectiveRate, reason } = runAdmit(store, { + now, + cooldownMs: 120_000, + probeRateLimit: 1 / 60, + }); + + expect(consumedTokens).toBe(1); + expect(reason).toBe("some_allowed"); + expect(effectiveRate).toBeCloseTo(1 / 60, 5); + }); + }); + + describe("circuit breaker — recovery ramp", () => { + const targetRateLimit = 10; + const probeRateLimit = 1 / 60; + const recoveryPeriodMs = 600_000; + const switchedAt = 1_000_000; + + it("uses probeRate at recovery start (progress=0)", () => { + const store = createRedisStore(); + const progress = 0; + const now = switchedAt + recoveryPeriodMs * progress; store.set( "ep:t1", @@ -425,19 +491,44 @@ describe("admit.lua", () => { const { effectiveRate } = runAdmit(store, { now, - targetRateLimit: 10, + targetRateLimit, recoveryPeriodMs, + probeRateLimit, }); - const probeRate = defaultArgs.probeRateLimit; - const expectedRate = probeRate + 0.5 * (10 - probeRate); + + expect(effectiveRate).toBeCloseTo(probeRateLimit, 5); + }); + + it("uses recovery ramp at midpoint (progress=0.5)", () => { + const store = createRedisStore(); + const progress = 0.5; + const now = switchedAt + recoveryPeriodMs * progress; + + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", switchedAt.toString()], + ["bucket_tokens", "0"], + ["bucket_refilled_at", "0"], + ]), + ); + + const { effectiveRate } = runAdmit(store, { + now, + targetRateLimit, + recoveryPeriodMs, + probeRateLimit, + }); + const expectedRate = + probeRateLimit + progress * (targetRateLimit - probeRateLimit); expect(effectiveRate).toBeCloseTo(expectedRate, 5); }); - it("uses full rate when closed and past recovery period", () => { + it("uses full targetRate at recovery boundary (progress=1)", () => { const store = createRedisStore(); - const switchedAt = 100_000; - const recoveryPeriodMs = 600_000; - const now = switchedAt + recoveryPeriodMs + 1; + const progress = 1; + const now = switchedAt + recoveryPeriodMs * progress; store.set( "ep:t1", @@ -451,93 +542,152 @@ describe("admit.lua", () => { const { effectiveRate } = runAdmit(store, { now, - targetRateLimit: 10, + targetRateLimit, recoveryPeriodMs, + probeRateLimit, }); - expect(effectiveRate).toBe(10); + + expect(effectiveRate).toBe(targetRateLimit); }); - }); - describe("state persistence", () => { - it("persists bucket_tokens and bucket_refilled_at", () => { + it("uses full rate when closed and past recovery period", () => { const store = createRedisStore(); - const now = 1_000_000; + const now = switchedAt + recoveryPeriodMs + 1; + store.set( "ep:t1", new Map([ ["is_open", "0"], - ["bucket_tokens", "5"], - ["bucket_refilled_at", now.toString()], - ["switched_at", "0"], + ["switched_at", switchedAt.toString()], + ["bucket_tokens", "0"], + ["bucket_refilled_at", "0"], ]), ); - runAdmit(store, { now, targetBatchSize: 2 }); + const { effectiveRate } = runAdmit(store, { + now, + targetRateLimit, + recoveryPeriodMs, + }); + expect(effectiveRate).toBe(targetRateLimit); + }); + }); - const epHash = store.get("ep:t1")!; - expect(Number(epHash.get("bucket_tokens"))).toBe(3); + describe("circuit breaker disabled", () => { + it("uses full targetRateLimit on a fresh endpoint (no cautious probe startup without CB)", () => { + const store = createRedisStore(); + const now = 1_000_000; + + const { effectiveRate } = runAdmit(store, { + now, + targetRateLimit: 10, + cbEnabled: false, + }); + + expect(effectiveRate).toBe(10); }); - it("does not write any fields when circuit_open early return", () => { + it("applies initial values on fresh endpoint so first call has no tokens", () => { const store = createRedisStore(); - runAdmit(store, { - now: 10_000, + const now = 1_000_000; + + const { consumedTokens, effectiveRate, reason } = runAdmit(store, { + now, + targetRateLimit: 10, + cbEnabled: false, }); - expect(store.has("ep:t1")).toBe(false); + expect(effectiveRate).toBe(10); + expect(consumedTokens).toBe(0); + expect(reason).toBe("rate_limited"); }); - it("does not write sampling or circuit fields on half-open path", () => { + it("ignores is_open state", () => { const store = createRedisStore(); - runAdmit(store, { - now: 200_000, + const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "1"], + ["switched_at", now.toString()], + ["bucket_tokens", "5"], + ["bucket_refilled_at", now.toString()], + ]), + ); + + const { consumedTokens, effectiveRate, reason } = runAdmit(store, { + now, + targetRateLimit: 10, + cbEnabled: false, }); - const epHash = store.get("ep:t1")!; - expect(epHash.has("bucket_tokens")).toBe(true); - expect(epHash.has("bucket_refilled_at")).toBe(true); - expect(epHash.has("cur_attempts")).toBe(false); - expect(epHash.has("cur_failures")).toBe(false); - expect(epHash.has("sample_till")).toBe(false); - expect(epHash.has("is_open")).toBe(false); - expect(epHash.has("switched_at")).toBe(false); + expect(effectiveRate).toBe(10); + expect(consumedTokens).toBe(1); + expect(reason).toBe("some_allowed"); }); - it("isolates state between targets", () => { + it("does not zero bucket tokens when is_open", () => { const store = createRedisStore(); + const now = 1_000_000; store.set( - "ep:target-a", + "ep:t1", new Map([ - ["is_open", "0"], + ["is_open", "1"], + ["switched_at", now.toString()], ["bucket_tokens", "5"], - ["bucket_refilled_at", "10000"], + ["bucket_refilled_at", now.toString()], ]), ); + + const { consumedTokens } = runAdmit(store, { + now, + targetRateLimit: 10, + cbEnabled: false, + targetBatchSize: 3, + }); + + expect(consumedTokens).toBe(3); + }); + + it("never returns circuit_open", () => { + const store = createRedisStore(); + const now = 1_000_000; store.set( - "ep:target-b", + "ep:t1", new Map([ - ["is_open", "0"], - ["bucket_tokens", "3"], - ["bucket_refilled_at", "10000"], + ["is_open", "1"], + ["switched_at", (now - 10_000).toString()], + ["bucket_tokens", "0"], + ["bucket_refilled_at", now.toString()], ]), ); - runAdmit(store, { now: 10_000 }, "target-a"); - runAdmit(store, { now: 10_000 }, "target-b"); + const { reason } = runAdmit(store, { + now, + cooldownMs: 120_000, + cbEnabled: false, + }); - expect(store.has("ep:target-a")).toBe(true); - expect(store.has("ep:target-b")).toBe(true); + expect(reason).not.toBe("circuit_open"); }); - }); - describe("circuit breaker disabled (cbEnabled = false)", () => { - it("uses full targetRateLimit on a fresh endpoint with no prior state", () => { + it("uses full rate during recovery period", () => { const store = createRedisStore(); const now = 1_000_000; + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", (now - 1000).toString()], + ["bucket_tokens", "0"], + ["bucket_refilled_at", "0"], + ]), + ); const { effectiveRate } = runAdmit(store, { now, targetRateLimit: 10, + recoveryPeriodMs: 600_000, cbEnabled: false, }); @@ -567,21 +717,6 @@ describe("admit.lua", () => { expect(withCbEnabled.reason).toBe("some_allowed"); }); - it("applies initial values on fresh endpoint so first call has no tokens", () => { - const store = createRedisStore(); - const now = 1_000_000; - - const { consumedTokens, effectiveRate, reason } = runAdmit(store, { - now, - targetRateLimit: 10, - cbEnabled: false, - }); - - expect(effectiveRate).toBe(10); - expect(consumedTokens).toBe(0); - expect(reason).toBe("rate_limited"); - }); - it("generates tokens at full rate after initial contact", () => { const store = createRedisStore(); @@ -599,74 +734,77 @@ describe("admit.lua", () => { expect(consumedTokens).toBe(1); expect(reason).toBe("some_allowed"); }); + }); - it("ignores is_open state when CB is disabled", () => { + describe("state persistence", () => { + it("persists bucket_tokens and bucket_refilled_at", () => { const store = createRedisStore(); const now = 1_000_000; store.set( "ep:t1", new Map([ - ["is_open", "1"], - ["switched_at", now.toString()], + ["is_open", "0"], ["bucket_tokens", "5"], ["bucket_refilled_at", now.toString()], + ["switched_at", "0"], ]), ); - const { consumedTokens, effectiveRate, reason } = runAdmit(store, { - now, - targetRateLimit: 10, - cbEnabled: false, - }); + runAdmit(store, { now, targetBatchSize: 2 }); - expect(effectiveRate).toBe(10); - expect(consumedTokens).toBe(1); - expect(reason).toBe("some_allowed"); + const epHash = store.get("ep:t1")!; + expect(Number(epHash.get("bucket_tokens"))).toBe(3); }); - it("does not zero bucket tokens when is_open and CB disabled", () => { + it("does not write any fields when circuit_open early return", () => { const store = createRedisStore(); - const now = 1_000_000; - store.set( - "ep:t1", - new Map([ - ["is_open", "1"], - ["switched_at", now.toString()], - ["bucket_tokens", "5"], - ["bucket_refilled_at", now.toString()], - ]), - ); + runAdmit(store, { + now: 10_000, + }); - const { consumedTokens } = runAdmit(store, { - now, - targetRateLimit: 10, - cbEnabled: false, - targetBatchSize: 3, + expect(store.has("ep:t1")).toBe(false); + }); + + it("does not write sampling or circuit fields on half-open path", () => { + const store = createRedisStore(); + runAdmit(store, { + now: 200_000, }); - expect(consumedTokens).toBe(3); + const epHash = store.get("ep:t1")!; + expect(epHash.has("bucket_tokens")).toBe(true); + expect(epHash.has("bucket_refilled_at")).toBe(true); + expect(epHash.has("cur_attempts")).toBe(false); + expect(epHash.has("cur_failures")).toBe(false); + expect(epHash.has("sample_till")).toBe(false); + expect(epHash.has("is_open")).toBe(false); + expect(epHash.has("switched_at")).toBe(false); }); - it("never returns circuit_open when CB is disabled", () => { + it("isolates state between targets", () => { const store = createRedisStore(); - const now = 1_000_000; store.set( - "ep:t1", + "ep:target-a", new Map([ - ["is_open", "1"], - ["switched_at", (now - 10_000).toString()], - ["bucket_tokens", "0"], - ["bucket_refilled_at", now.toString()], + ["is_open", "0"], + ["bucket_tokens", "5"], + ["bucket_refilled_at", "10000"], + ]), + ); + store.set( + "ep:target-b", + new Map([ + ["is_open", "0"], + ["bucket_tokens", "3"], + ["bucket_refilled_at", "10000"], ]), ); - const { reason } = runAdmit(store, { - now, - cooldownMs: 120_000, - cbEnabled: false, - }); + runAdmit(store, { now: 10_000 }, "target-a"); + runAdmit(store, { now: 10_000 }, "target-b"); - expect(reason).not.toBe("circuit_open"); + expect(store.has("ep:target-a")).toBe(true); + expect(store.has("ep:target-b")).toBe(true); }); }); }); diff --git a/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts b/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts index cde2a653..6d2e13f3 100644 --- a/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/record-result-lua.test.ts @@ -55,7 +55,7 @@ function runRecordResult( } describe("record-result.lua", () => { - describe("success recording", () => { + describe("recording attempts and failures", () => { it("returns closed state for a successful batch with no state change", () => { const store = createRedisStore(); store.set( @@ -93,9 +93,7 @@ describe("record-result.lua", () => { expect(epHash.get("cur_attempts")).toBe("3"); expect(epHash.get("cur_failures")).toBe("0"); }); - }); - describe("failure recording", () => { it("increments both cur_attempts and cur_failures", () => { const store = createRedisStore(); store.set( @@ -114,28 +112,6 @@ describe("record-result.lua", () => { expect(epHash.get("cur_failures")).toBe("1"); }); - it("returns closed state for failures below threshold", () => { - const store = createRedisStore(); - store.set( - "ep:t1", - new Map([ - ["is_open", "0"], - ["switched_at", "0"], - ["sample_till", "9999999999"], - ]), - ); - - const [circuitState, circuitSwitched] = runRecordResult(store, { - consumedTokens: 1, - processingFailures: 1, - }); - - expect(circuitState).toBe("closed"); - expect(circuitSwitched).toBe(0); - }); - }); - - describe("recording guard — fully open", () => { it("does not record attempts/failures when circuit is fully open", () => { const store = createRedisStore(); const now = 1_000_000; @@ -188,6 +164,70 @@ describe("record-result.lua", () => { expect(circuitState).toBe("open"); expect(circuitSwitched).toBe(0); }); + + it("treats a fresh endpoint as fully open during cooldown and does not record", () => { + const store = createRedisStore(); + const now = 10_000; + + const [circuitState, circuitSwitched] = runRecordResult(store, { + now, + cooldownPeriodMs: 120_000, + consumedTokens: 5, + processingFailures: 3, + }); + + expect(circuitState).toBe("open"); + expect(circuitSwitched).toBe(0); + + const epHash = store.get("ep:t1")!; + expect(epHash.get("cur_attempts")).toBe("0"); + expect(epHash.get("cur_failures")).toBe("0"); + }); + + it("treats a fresh endpoint as half-open when now exceeds cooldown", () => { + const store = createRedisStore(); + const now = 1_000_000; + + const [circuitState, circuitSwitched] = runRecordResult(store, { + now, + cooldownPeriodMs: 120_000, + consumedTokens: 1, + processingFailures: 0, + minAttempts: 5, + }); + + expect(circuitState).toBe("closed_recovery"); + expect(circuitSwitched).toBe(1); + }); + + it("accumulates across multiple calls within the same sample window", () => { + const store = createRedisStore(); + const now = 1_000_000; + + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", "9999999999"], + ]), + ); + + runRecordResult(store, { + now, + consumedTokens: 3, + processingFailures: 1, + }); + runRecordResult(store, { + now: now + 1000, + consumedTokens: 2, + processingFailures: 0, + }); + + const epHash = store.get("ep:t1")!; + expect(epHash.get("cur_attempts")).toBe("5"); + expect(epHash.get("cur_failures")).toBe("1"); + }); }); describe("circuit opening", () => { @@ -233,6 +273,27 @@ describe("record-result.lua", () => { expect(circuitSwitched).toBe(0); }); + it("does not open circuit when failure rate equals threshold exactly", () => { + const store = createRedisStore(); + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", "9999999999"], + ]), + ); + + const [circuitState, circuitSwitched] = runRecordResult(store, { + consumedTokens: 10, + processingFailures: 3, + minAttempts: 5, + failureThreshold: 0.3, + }); + expect(circuitState).toBe("closed"); + expect(circuitSwitched).toBe(0); + }); + it("sets is_open and switched_at on open", () => { const store = createRedisStore(); const now = 1_000_000; @@ -287,13 +348,51 @@ describe("record-result.lua", () => { expect(epHash.get("prev_attempts")).toBe("0"); expect(Number(epHash.get("sample_till"))).toBe(now + samplePeriodMs); }); + + it("resets cooldown when a half-open probe succeeds but accumulated failures still exceed threshold", () => { + const store = createRedisStore(); + const now = 1_000_000; + const cooldownPeriodMs = 120_000; + const switchedAt = now - cooldownPeriodMs - 10_000; // past cooldown → half-open + + store.set( + "ep:t1", + new Map([ + ["is_open", "1"], + ["switched_at", switchedAt.toString()], + ["sample_till", (now + 300_000).toString()], + ["prev_attempts", "20"], + ["prev_failures", "20"], + ["cur_attempts", "0"], + ["cur_failures", "0"], + ]), + ); + + const [circuitState, circuitSwitched] = runRecordResult(store, { + now, + cooldownPeriodMs, + consumedTokens: 1, + processingFailures: 0, // probe succeeds + minAttempts: 5, + failureThreshold: 0.3, + }); + + expect(circuitState).toBe("open"); + expect(circuitSwitched).toBe(1); + + const epHash = store.get("ep:t1")!; + expect(Number(epHash.get("switched_at"))).toBe(now); // fresh cooldown + expect(epHash.get("cur_attempts")).toBe("0"); + expect(epHash.get("prev_attempts")).toBe("0"); + }); }); - describe("circuit closing — half-open with successes", () => { + describe("circuit closing", () => { it("closes circuit when half-open and batch has successes", () => { const store = createRedisStore(); const now = 1_000_000; - const switchedAt = now - 130_000; + const cooldownPeriodMs = 120_000; + const switchedAt = now - cooldownPeriodMs - 10_000; // past cooldown → half-open store.set( "ep:t1", @@ -304,11 +403,14 @@ describe("record-result.lua", () => { ]), ); + const consumedTokens = 5; + const processingFailures = 1; // 4 successes out of 5 (rate 0.2 < 0.3 threshold, no re-open) + const [circuitState, circuitSwitched] = runRecordResult(store, { now, - cooldownPeriodMs: 120_000, - consumedTokens: 1, - processingFailures: 0, + cooldownPeriodMs, + consumedTokens, + processingFailures, }); expect(circuitState).toBe("closed_recovery"); @@ -343,9 +445,59 @@ describe("record-result.lua", () => { expect(circuitState).toBe("open_half"); expect(circuitSwitched).toBe(0); }); + + it("does not close when processingFailures exceeds consumedTokens", () => { + const store = createRedisStore(); + const now = 1_000_000; + const switchedAt = now - 130_000; + + store.set( + "ep:t1", + new Map([ + ["is_open", "1"], + ["switched_at", switchedAt.toString()], + ["sample_till", "9999999999"], + ]), + ); + + const [circuitState, circuitSwitched] = runRecordResult(store, { + now, + cooldownPeriodMs: 120_000, + consumedTokens: 1, + processingFailures: 2, + }); + + expect(circuitState).toBe("open_half"); + expect(circuitSwitched).toBe(0); + }); + + it("reports closed when past recovery period", () => { + const store = createRedisStore(); + const now = 1_000_000; + const switchedAt = now - 700_000; + + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", switchedAt.toString()], + ["sample_till", "9999999999"], + ]), + ); + + const [circuitState, circuitSwitched] = runRecordResult(store, { + now, + recoveryPeriodMs: 600_000, + consumedTokens: 1, + processingFailures: 0, + }); + + expect(circuitState).toBe("closed"); + expect(circuitSwitched).toBe(0); + }); }); - describe("sliding window management", () => { + describe("sliding window", () => { it("promotes current to previous when sampleTill expires", () => { const store = createRedisStore(); const now = 1_000_000; @@ -402,11 +554,37 @@ describe("record-result.lua", () => { expect(Number(epHash.get("sample_till"))).toBe(now + samplePeriodMs); }); + it("does not promote when sampleTill equals now exactly", () => { + const store = createRedisStore(); + const now = 1_000_000; + + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", now.toString()], + ["cur_attempts", "10"], + ["cur_failures", "3"], + ["prev_attempts", "0"], + ["prev_failures", "0"], + ]), + ); + + runRecordResult(store, { now, consumedTokens: 1, processingFailures: 0 }); + + const epHash = store.get("ep:t1")!; + expect(epHash.get("prev_attempts")).toBe("0"); + expect(epHash.get("cur_attempts")).toBe("11"); + }); + it("interpolates using weight from sampleTill", () => { const store = createRedisStore(); const samplePeriodMs = 300_000; const now = 1_000_000; - const sampleTill = now + samplePeriodMs; + const sampleTill = now + samplePeriodMs / 2; // weight = 0.5 + const failureThreshold = 0.3; + const minAttempts = 10; store.set( "ep:t1", @@ -414,22 +592,23 @@ describe("record-result.lua", () => { ["is_open", "0"], ["switched_at", "0"], ["sample_till", sampleTill.toString()], - ["prev_attempts", "10"], - ["prev_failures", "10"], + ["prev_attempts", "20"], + ["prev_failures", "20"], // 100% failure in previous window ]), ); - // weight = (sampleTill - now) / samplePeriodMs = 1.0 - // interpolated attempts = 10 * 1.0 + 5 = 15 (>= minAttempts 5) - // interpolated failures = 10 * 1.0 + 5 = 15 - // failure rate = 15/15 = 1.0 > 0.3 → opens + // Current batch: 1 attempt, 0 failures — not enough alone to trip + // weight = (sampleTill - now) / samplePeriodMs = 0.5 + // interpolated attempts = 20 * 0.5 + 1 = 11 (>= minAttempts 10) + // interpolated failures = 20 * 0.5 + 0 = 10 + // failure rate = 10/11 ≈ 0.91 > 0.3 → opens const [circuitState, circuitSwitched] = runRecordResult(store, { now, samplePeriodMs, - consumedTokens: 5, - processingFailures: 5, - minAttempts: 5, - failureThreshold: 0.3, + consumedTokens: 1, + processingFailures: 0, + minAttempts, + failureThreshold, }); expect(circuitState).toBe("open"); expect(circuitSwitched).toBe(1); @@ -456,5 +635,60 @@ describe("record-result.lua", () => { expect(epHash.has("prev_failures")).toBe(true); expect(epHash.has("sample_till")).toBe(true); }); + + it("does not write is_open or switched_at when circuit does not switch", () => { + const store = createRedisStore(); + store.set( + "ep:t1", + new Map([ + ["is_open", "0"], + ["switched_at", "500"], + ["sample_till", "9999999999"], + ]), + ); + + runRecordResult(store, { consumedTokens: 1, processingFailures: 0 }); + + const epHash = store.get("ep:t1")!; + expect(epHash.get("switched_at")).toBe("500"); + }); + + it("isolates state between different targets", () => { + const store = createRedisStore(); + store.set( + "ep:target-a", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", "9999999999"], + ]), + ); + store.set( + "ep:target-b", + new Map([ + ["is_open", "0"], + ["switched_at", "0"], + ["sample_till", "9999999999"], + ]), + ); + + runRecordResult( + store, + { consumedTokens: 5, processingFailures: 0 }, + "target-a", + ); + runRecordResult( + store, + { consumedTokens: 3, processingFailures: 2 }, + "target-b", + ); + + const hashA = store.get("ep:target-a")!; + const hashB = store.get("ep:target-b")!; + expect(hashA.get("cur_attempts")).toBe("5"); + expect(hashA.get("cur_failures")).toBe("0"); + expect(hashB.get("cur_attempts")).toBe("3"); + expect(hashB.get("cur_failures")).toBe("2"); + }); }); }); From fda5541dc9d8813629258bf16f9a2bc40e34c8bc Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 09:48:14 +0100 Subject: [PATCH 52/65] Fix unit test resolves deployment context with defaults' when AWS_PROFILE is set locally --- .../src/__tests__/aws.test.ts | 27 ++++++++++++------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tools/client-subscriptions-management/src/__tests__/aws.test.ts b/tools/client-subscriptions-management/src/__tests__/aws.test.ts index d1af5109..528acc31 100644 --- a/tools/client-subscriptions-management/src/__tests__/aws.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/aws.test.ts @@ -136,16 +136,25 @@ describe("aws", () => { }); it("resolves deployment context with defaults", async () => { - const ctx = await resolveDeploymentContext({ - environment: "staging", - }); + const savedProfile = process.env.AWS_PROFILE; + delete process.env.AWS_PROFILE; - expect(ctx).toEqual({ - environment: "staging", - region: "eu-west-2", - accountId: "123456789012", - profile: undefined, - }); + try { + const ctx = await resolveDeploymentContext({ + environment: "staging", + }); + + expect(ctx).toEqual({ + environment: "staging", + region: "eu-west-2", + accountId: "123456789012", + profile: undefined, + }); + } finally { + if (savedProfile !== undefined) { + process.env.AWS_PROFILE = savedProfile; + } + } }); it("throws when environment is missing from deployment context", async () => { From 508a1a63f9d462004bb077e1d5d595b038b81374 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 11:03:59 +0100 Subject: [PATCH 53/65] Refactor metrics test to avoid repetition --- .../src/__tests__/delivery-metrics.test.ts | 136 ++++++------------ 1 file changed, 42 insertions(+), 94 deletions(-) diff --git a/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts b/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts index 10212c29..e5619199 100644 --- a/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/delivery-metrics.test.ts @@ -14,13 +14,17 @@ describe("delivery-metrics", () => { flush: jest.fn().mockResolvedValue(undefined), }; - beforeEach(() => { + let mod: typeof import("services/delivery-metrics"); + + beforeEach(async () => { jest.resetModules(); jest.clearAllMocks(); mockCreateMetricsLogger.mockReturnValue(mockMetrics); process.env.METRICS_NAMESPACE = "TestNamespace"; process.env.ENVIRONMENT = "test"; process.env.CLIENT_ID = "client-1"; + // @ts-expect-error -- modulePaths resolves at runtime + mod = await import("services/delivery-metrics"); }); afterEach(() => { @@ -31,6 +35,7 @@ describe("delivery-metrics", () => { it("throws when METRICS_NAMESPACE is not set", async () => { delete process.env.METRICS_NAMESPACE; + jest.resetModules(); // @ts-expect-error -- modulePaths resolves at runtime const { emitDeliveryAttempt } = await import("services/delivery-metrics"); @@ -41,6 +46,7 @@ describe("delivery-metrics", () => { it("throws when ENVIRONMENT is not set", async () => { delete process.env.ENVIRONMENT; + jest.resetModules(); // @ts-expect-error -- modulePaths resolves at runtime const { emitDeliveryAttempt } = await import("services/delivery-metrics"); @@ -51,6 +57,7 @@ describe("delivery-metrics", () => { it("throws when CLIENT_ID is not set", async () => { delete process.env.CLIENT_ID; + jest.resetModules(); // @ts-expect-error -- modulePaths resolves at runtime const { emitDeliveryAttempt } = await import("services/delivery-metrics"); @@ -59,11 +66,8 @@ describe("delivery-metrics", () => { ); }); - it("creates metrics logger with correct namespace and dimensions", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const { emitDeliveryAttempt } = await import("services/delivery-metrics"); - - emitDeliveryAttempt("t-1"); + it("creates metrics logger with correct namespace and dimensions", () => { + mod.emitDeliveryAttempt("t-1"); expect(mockMetrics.setNamespace).toHaveBeenCalledWith("TestNamespace"); expect(mockMetrics.setDimensions).toHaveBeenCalledWith({ @@ -72,22 +76,15 @@ describe("delivery-metrics", () => { }); }); - it("caches the metrics logger on subsequent calls", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const mod = await import("services/delivery-metrics"); - const { emitDeliveryAttempt, emitDeliverySuccess } = mod; - - emitDeliveryAttempt("t-1"); - emitDeliverySuccess("t-1"); + it("caches the metrics logger on subsequent calls", () => { + mod.emitDeliveryAttempt("t-1"); + mod.emitDeliverySuccess("t-1"); expect(mockCreateMetricsLogger).toHaveBeenCalledTimes(1); }); - it("emitDeliveryAttempt emits correct metric", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const { emitDeliveryAttempt } = await import("services/delivery-metrics"); - - emitDeliveryAttempt("target-42"); + it("emitDeliveryAttempt emits correct metric", () => { + mod.emitDeliveryAttempt("target-42"); expect(mockMetrics.setProperty).toHaveBeenCalledWith( "targetId", @@ -101,11 +98,8 @@ describe("delivery-metrics", () => { ); }); - it("emitDeliverySuccess emits correct metric", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const { emitDeliverySuccess } = await import("services/delivery-metrics"); - - emitDeliverySuccess("target-42"); + it("emitDeliverySuccess emits correct metric", () => { + mod.emitDeliverySuccess("target-42"); expect(mockMetrics.putMetric).toHaveBeenCalledWith( "DeliverySuccess", @@ -115,11 +109,8 @@ describe("delivery-metrics", () => { ); }); - it("emitDeliveryFailure emits correct metric", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const { emitDeliveryFailure } = await import("services/delivery-metrics"); - - emitDeliveryFailure("target-42"); + it("emitDeliveryFailure emits correct metric", () => { + mod.emitDeliveryFailure("target-42"); expect(mockMetrics.putMetric).toHaveBeenCalledWith( "DeliveryFailure", @@ -129,12 +120,8 @@ describe("delivery-metrics", () => { ); }); - it("emitDeliveryPermanentFailure emits correct metric", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const mod = await import("services/delivery-metrics"); - const { emitDeliveryPermanentFailure } = mod; - - emitDeliveryPermanentFailure("target-42"); + it("emitDeliveryPermanentFailure emits correct metric", () => { + mod.emitDeliveryPermanentFailure("target-42"); expect(mockMetrics.putMetric).toHaveBeenCalledWith( "DeliveryPermanentFailure", @@ -144,12 +131,8 @@ describe("delivery-metrics", () => { ); }); - it("emitCircuitBreakerOpen emits correct metric", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const mod = await import("services/delivery-metrics"); - const { emitCircuitBreakerOpen } = mod; - - emitCircuitBreakerOpen("target-42"); + it("emitCircuitBreakerOpen emits correct metric", () => { + mod.emitCircuitBreakerOpen("target-42"); expect(mockMetrics.putMetric).toHaveBeenCalledWith( "CircuitBreakerOpen", @@ -159,12 +142,8 @@ describe("delivery-metrics", () => { ); }); - it("emitServerRateLimited emits correct metric", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const mod = await import("services/delivery-metrics"); - const { emitServerRateLimited } = mod; - - emitServerRateLimited("target-42"); + it("emitServerRateLimited emits correct metric", () => { + mod.emitServerRateLimited("target-42"); expect(mockMetrics.putMetric).toHaveBeenCalledWith( "DeliveryServerRateLimited", @@ -174,12 +153,8 @@ describe("delivery-metrics", () => { ); }); - it("emitCircuitBreakerClosed emits correct metric", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const mod = await import("services/delivery-metrics"); - const { emitCircuitBreakerClosed } = mod; - - emitCircuitBreakerClosed("target-42"); + it("emitCircuitBreakerClosed emits correct metric", () => { + mod.emitCircuitBreakerClosed("target-42"); expect(mockMetrics.putMetric).toHaveBeenCalledWith( "CircuitBreakerClosed", @@ -189,12 +164,8 @@ describe("delivery-metrics", () => { ); }); - it("emitRetryWindowExhausted emits correct metric", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const mod = await import("services/delivery-metrics"); - const { emitRetryWindowExhausted } = mod; - - emitRetryWindowExhausted("target-42"); + it("emitRetryWindowExhausted emits correct metric", () => { + mod.emitRetryWindowExhausted("target-42"); expect(mockMetrics.putMetric).toHaveBeenCalledWith( "DeliveryRetryWindowExhausted", @@ -204,12 +175,8 @@ describe("delivery-metrics", () => { ); }); - it("emitClientRateLimited emits correct metric", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const mod = await import("services/delivery-metrics"); - const { emitClientRateLimited } = mod; - - emitClientRateLimited("target-42", 3); + it("emitClientRateLimited emits correct metric", () => { + mod.emitClientRateLimited("target-42", 3); expect(mockMetrics.setProperty).toHaveBeenCalledWith( "targetId", @@ -223,12 +190,8 @@ describe("delivery-metrics", () => { ); }); - it("emitCircuitBlocked emits correct metric", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const mod = await import("services/delivery-metrics"); - const { emitCircuitBlocked } = mod; - - emitCircuitBlocked("target-42", 2); + it("emitCircuitBlocked emits correct metric", () => { + mod.emitCircuitBlocked("target-42", 2); expect(mockMetrics.setProperty).toHaveBeenCalledWith( "targetId", @@ -242,12 +205,8 @@ describe("delivery-metrics", () => { ); }); - it("emitDeliveryDuration emits correct metric", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const mod = await import("services/delivery-metrics"); - const { emitDeliveryDuration } = mod; - - emitDeliveryDuration("target-42", 250); + it("emitDeliveryDuration emits correct metric", () => { + mod.emitDeliveryDuration("target-42", 250); expect(mockMetrics.putMetric).toHaveBeenCalledWith( "DeliveryDurationMs", @@ -258,33 +217,22 @@ describe("delivery-metrics", () => { }); it("flushMetrics calls flush on the instance", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const mod = await import("services/delivery-metrics"); - const { emitDeliveryAttempt, flushMetrics } = mod; - - emitDeliveryAttempt("t-1"); - await flushMetrics(); + mod.emitDeliveryAttempt("t-1"); + await mod.flushMetrics(); expect(mockMetrics.flush).toHaveBeenCalled(); }); it("flushMetrics does nothing when no metrics instance exists", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const { flushMetrics } = await import("services/delivery-metrics"); - - await flushMetrics(); + await mod.flushMetrics(); expect(mockMetrics.flush).not.toHaveBeenCalled(); }); - it("resetMetrics clears the cached instance", async () => { - // @ts-expect-error -- modulePaths resolves at runtime - const mod = await import("services/delivery-metrics"); - const { emitDeliveryAttempt, resetMetrics } = mod; - - emitDeliveryAttempt("t-1"); - resetMetrics(); - emitDeliveryAttempt("t-2"); + it("resetMetrics clears the cached instance", () => { + mod.emitDeliveryAttempt("t-1"); + mod.resetMetrics(); + mod.emitDeliveryAttempt("t-2"); expect(mockCreateMetricsLogger).toHaveBeenCalledTimes(2); }); From b1b0d88cee5ee5770dbcfffb6877eae8315f4b5b Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 11:39:32 +0100 Subject: [PATCH 54/65] Refactor consistency in http lambda env var test overrides --- .../src/__tests__/applications-map.test.ts | 15 ++++++------- .../src/__tests__/config-loader.test.ts | 22 ++++++++----------- .../src/__tests__/dlq-sender.test.ts | 10 ++++----- .../src/__tests__/handler.test.ts | 10 ++++----- .../src/__tests__/sqs-visibility.test.ts | 11 +++++----- 5 files changed, 32 insertions(+), 36 deletions(-) diff --git a/lambdas/https-client-lambda/src/__tests__/applications-map.test.ts b/lambdas/https-client-lambda/src/__tests__/applications-map.test.ts index 5c5b2f63..23193ad9 100644 --- a/lambdas/https-client-lambda/src/__tests__/applications-map.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/applications-map.test.ts @@ -23,9 +23,6 @@ jest.mock("@nhs-notify-client-callbacks/logger", () => ({ }, })); -process.env.APPLICATIONS_MAP_S3_BUCKET = "test-bucket"; -process.env.APPLICATIONS_MAP_S3_KEY = "test/applications-map.json"; - const mockBody = (content: string) => ({ transformToString: jest.fn().mockResolvedValue(content), @@ -35,6 +32,13 @@ describe("getApplicationId", () => { beforeEach(() => { mockSend.mockReset(); resetCache(); + process.env.APPLICATIONS_MAP_S3_BUCKET = "test-bucket"; + process.env.APPLICATIONS_MAP_S3_KEY = "test/applications-map.json"; + }); + + afterEach(() => { + delete process.env.APPLICATIONS_MAP_S3_BUCKET; + delete process.env.APPLICATIONS_MAP_S3_KEY; }); it("returns correct applicationId for a known clientId", async () => { @@ -71,8 +75,6 @@ describe("getApplicationId", () => { it("throws when env vars are not set", async () => { let getFn: typeof getApplicationId; - const savedBucket = process.env.APPLICATIONS_MAP_S3_BUCKET; - const savedKey = process.env.APPLICATIONS_MAP_S3_KEY; delete process.env.APPLICATIONS_MAP_S3_BUCKET; delete process.env.APPLICATIONS_MAP_S3_KEY; @@ -84,9 +86,6 @@ describe("getApplicationId", () => { await expect(getFn!("client-1")).rejects.toThrow( "APPLICATIONS_MAP_S3_BUCKET and APPLICATIONS_MAP_S3_KEY are required", ); - - process.env.APPLICATIONS_MAP_S3_BUCKET = savedBucket; - process.env.APPLICATIONS_MAP_S3_KEY = savedKey; }); it("throws when S3 object body is empty", async () => { diff --git a/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts b/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts index fc880afa..f635aaa0 100644 --- a/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/config-loader.test.ts @@ -23,10 +23,6 @@ jest.mock("@nhs-notify-client-callbacks/logger", () => ({ }, })); -process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET = "test-bucket"; -process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX = "client_subscriptions/"; -process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS = "1"; - const VALID_TARGET = { targetId: "target-1", type: "API" as const, @@ -52,6 +48,15 @@ describe("loadTargetConfig", () => { beforeEach(() => { mockS3Send.mockReset(); resetCache(); + process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET = "test-bucket"; + process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX = "client_subscriptions/"; + process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS = "1"; + }); + + afterEach(() => { + delete process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET; + delete process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX; + delete process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS; }); it("parses valid S3 config and returns the matching target", async () => { @@ -121,7 +126,6 @@ describe("loadTargetConfig", () => { it("throws when CLIENT_SUBSCRIPTION_CONFIG_BUCKET is not set", async () => { let loadFn: typeof loadTargetConfig; - const saved = process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET; delete process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET; jest.isolateModules(() => { @@ -132,8 +136,6 @@ describe("loadTargetConfig", () => { await expect(loadFn!("client-1", "target-1")).rejects.toThrow( "CLIENT_SUBSCRIPTION_CONFIG_BUCKET is required", ); - - process.env.CLIENT_SUBSCRIPTION_CONFIG_BUCKET = saved; }); it("throws when S3 response body is empty", async () => { @@ -161,7 +163,6 @@ describe("loadTargetConfig", () => { }); it("uses default prefix when CLIENT_SUBSCRIPTION_CONFIG_PREFIX is not set", async () => { - const saved = process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX; delete process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX; resetCache(); mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); @@ -170,12 +171,9 @@ describe("loadTargetConfig", () => { const command: GetObjectCommand = mockS3Send.mock.calls[0][0]; expect(command.input.Key).toBe("client_subscriptions/client-1.json"); - - process.env.CLIENT_SUBSCRIPTION_CONFIG_PREFIX = saved; }); it("uses default TTL when CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS is not set", async () => { - const saved = process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS; delete process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS; resetCache(); mockS3Send.mockResolvedValue(makeS3Response(VALID_CONFIG)); @@ -183,8 +181,6 @@ describe("loadTargetConfig", () => { const result = await loadTargetConfig("client-1", "target-1"); expect(result).toEqual(VALID_TARGET); - - process.env.CLIENT_SUBSCRIPTION_CACHE_TTL_SECONDS = saved; }); it("throws when loadClientConfig resolves to undefined", async () => { diff --git a/lambdas/https-client-lambda/src/__tests__/dlq-sender.test.ts b/lambdas/https-client-lambda/src/__tests__/dlq-sender.test.ts index 692e41c9..cd2286e5 100644 --- a/lambdas/https-client-lambda/src/__tests__/dlq-sender.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/dlq-sender.test.ts @@ -13,11 +13,14 @@ jest.mock("@aws-sdk/client-sqs", () => { }; }); -process.env.DLQ_URL = "https://sqs.eu-west-2.invalid/123456789/test-dlq"; - describe("sendToDlq", () => { beforeEach(() => { mockSend.mockReset(); + process.env.DLQ_URL = "https://sqs.eu-west-2.invalid/123456789/test-dlq"; + }); + + afterEach(() => { + delete process.env.DLQ_URL; }); it("sends SendMessageCommand with correct QueueUrl and MessageBody", async () => { @@ -42,7 +45,6 @@ describe("sendToDlq", () => { it("throws when DLQ_URL is not set", async () => { let sendFn: typeof sendToDlq; - const saved = process.env.DLQ_URL; delete process.env.DLQ_URL; jest.isolateModules(() => { @@ -51,8 +53,6 @@ describe("sendToDlq", () => { }); await expect(sendFn!("body")).rejects.toThrow("DLQ_URL is required"); - - process.env.DLQ_URL = saved; }); it("includes ERROR_CODE and ERROR_MESSAGE for HTTP error with JSON body", async () => { diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index bb7f1a4e..01d87b42 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -93,13 +93,12 @@ jest.mock("services/delivery-metrics", () => ({ resetMetrics: jest.fn(), })); -process.env.CLIENT_ID = "client-1"; - describe("processRecords", () => { const mockAgent = {}; beforeEach(() => { jest.clearAllMocks(); + process.env.CLIENT_ID = "client-1"; mockLoadTargetConfig.mockResolvedValue(DEFAULT_TARGET); mockGetApplicationId.mockResolvedValue("app-id-1"); mockSignPayload.mockReturnValue("signature-abc"); @@ -121,6 +120,10 @@ describe("processRecords", () => { mockRecordResult.mockResolvedValue({ ok: true, state: "ok" }); }); + afterEach(() => { + delete process.env.CLIENT_ID; + }); + it("returns no failures on successful delivery", async () => { const failures = await processRecords([makeRecord()]); @@ -340,7 +343,6 @@ describe("processRecords", () => { }); it("sends all records to DLQ when CLIENT_ID is not set", async () => { - const saved = process.env.CLIENT_ID; delete process.env.CLIENT_ID; const record1 = makeRecord({ messageId: "msg-1" }); @@ -353,8 +355,6 @@ describe("processRecords", () => { expect(mockSendToDlq).toHaveBeenCalledWith(record2.body); expect(mockSendToDlq).toHaveBeenCalledTimes(2); expect(mockDeliverPayload).not.toHaveBeenCalled(); - - process.env.CLIENT_ID = saved; }); it("sends to DLQ when retry window is exhausted", async () => { diff --git a/lambdas/https-client-lambda/src/__tests__/sqs-visibility.test.ts b/lambdas/https-client-lambda/src/__tests__/sqs-visibility.test.ts index 9e0d9e54..2bdc0474 100644 --- a/lambdas/https-client-lambda/src/__tests__/sqs-visibility.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/sqs-visibility.test.ts @@ -13,11 +13,15 @@ jest.mock("@aws-sdk/client-sqs", () => { }; }); -process.env.QUEUE_URL = "https://sqs.eu-west-2.invalid/123456789/test-queue"; - describe("changeVisibility", () => { beforeEach(() => { mockSend.mockReset(); + process.env.QUEUE_URL = + "https://sqs.eu-west-2.invalid/123456789/test-queue"; + }); + + afterEach(() => { + delete process.env.QUEUE_URL; }); it("sends ChangeMessageVisibilityCommand with correct params", async () => { @@ -54,7 +58,6 @@ describe("changeVisibility", () => { it("throws when QUEUE_URL is not set", async () => { let changeFn: typeof changeVisibility; - const saved = process.env.QUEUE_URL; delete process.env.QUEUE_URL; jest.isolateModules(() => { @@ -65,7 +68,5 @@ describe("changeVisibility", () => { await expect(changeFn!("receipt-handle-1", 30)).rejects.toThrow( "QUEUE_URL is required", ); - - process.env.QUEUE_URL = saved; }); }); From ec872a6710fd088556fc626357c549975b06f807 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 12:24:45 +0100 Subject: [PATCH 55/65] Refactor http lambda handler admission denied duplication and added DeliveryOutcome type --- .../src/__tests__/handler.test.ts | 28 +--- .../src/__tests__/retry-policy.test.ts | 86 +++++----- lambdas/https-client-lambda/src/handler.ts | 150 ++++++++++-------- .../src/services/client-rate-limited-error.ts | 1 - .../src/services/delivery/retry-policy.ts | 7 +- .../src/services/visibility-managed-error.ts | 1 - 6 files changed, 134 insertions(+), 139 deletions(-) delete mode 100644 lambdas/https-client-lambda/src/services/client-rate-limited-error.ts delete mode 100644 lambdas/https-client-lambda/src/services/visibility-managed-error.ts diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 01d87b42..f11a667f 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -3,8 +3,6 @@ import { DEFAULT_TARGET, makeRecord, } from "__tests__/fixtures/handler-fixtures"; -import { ClientRateLimitedError } from "services/client-rate-limited-error"; -import { VisibilityManagedError } from "services/visibility-managed-error"; jest.mock("@nhs-notify-client-callbacks/logger", () => ({ logger: { @@ -108,9 +106,7 @@ describe("processRecords", () => { mockChangeVisibility.mockResolvedValue(undefined); mockJitteredBackoff.mockReturnValue(5); mockIsWindowExhausted.mockReturnValue(false); - mockHandleRateLimitedRecord.mockRejectedValue( - new ClientRateLimitedError("Rate limited — requeue"), - ); + mockHandleRateLimitedRecord.mockResolvedValue("retry"); mockGetRedisClient.mockResolvedValue({}); mockAdmit.mockResolvedValue({ allowed: true, @@ -267,19 +263,7 @@ describe("processRecords", () => { expect(mockChangeVisibility).not.toHaveBeenCalled(); }); - it("retries VisibilityManagedError without DLQ", async () => { - mockDeliverPayload.mockRejectedValue( - new VisibilityManagedError("Rate limited — requeue"), - ); - - const failures = await processRecords([makeRecord()]); - - expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); - expect(mockSendToDlq).not.toHaveBeenCalled(); - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 5); - }); - - it("does not override visibility on ClientRateLimitedError", async () => { + it("does not override visibility on rate-limited requeue", async () => { mockDeliverPayload.mockResolvedValue({ outcome: "rate_limited", retryAfterHeader: "120", @@ -287,7 +271,7 @@ describe("processRecords", () => { }); mockHandleRateLimitedRecord.mockImplementation(async () => { await mockChangeVisibility("receipt-1", 120); - throw new ClientRateLimitedError("Rate limited — requeue"); + return "retry"; }); await processRecords([makeRecord()]); @@ -401,7 +385,7 @@ describe("processRecords", () => { outcome: "rate_limited", retryAfterHeader: "99999", }); - mockHandleRateLimitedRecord.mockResolvedValue(undefined); + mockHandleRateLimitedRecord.mockResolvedValue("dlq"); const failures = await processRecords([makeRecord()]); @@ -769,7 +753,7 @@ describe("processRecords", () => { statusCode: 429, retryAfterHeader: "60", }); - mockHandleRateLimitedRecord.mockResolvedValueOnce(undefined); + mockHandleRateLimitedRecord.mockResolvedValueOnce("dlq"); const failures = await processRecords([makeRecord()]); @@ -849,7 +833,7 @@ describe("processRecords", () => { batchSize: 2, deliveredCount: 1, dlqCount: 1, - failureCount: 0, + retryCount: 0, }), ); }); diff --git a/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts b/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts index 12656385..468a02a2 100644 --- a/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/retry-policy.test.ts @@ -6,7 +6,6 @@ import { jitteredBackoffSeconds, parseRetryAfter, } from "services/delivery/retry-policy"; -import { ClientRateLimitedError } from "services/client-rate-limited-error"; const mockSendToDlq = jest.fn(); jest.mock("services/dlq-sender", () => ({ @@ -142,24 +141,29 @@ describe("handleRateLimitedRecord", () => { describe("429 under retry period", () => { it("retries with Retry-After delay when within SQS max visibility", async () => { - await expect( - handleRateLimitedRecord(makeRecord(), "client-1", "target-1", "120", 1), - ).rejects.toThrow(ClientRateLimitedError); + const result = await handleRateLimitedRecord( + makeRecord(), + "client-1", + "target-1", + "120", + 1, + ); + expect(result).toBe("retry"); expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 120); expect(mockSendToDlq).not.toHaveBeenCalled(); }); it("retries with jittered backoff when no Retry-After header provided", async () => { - await expect( - handleRateLimitedRecord( - makeRecord(), - "client-1", - "target-1", - undefined, - 1, - ), - ).rejects.toThrow(ClientRateLimitedError); + const result = await handleRateLimitedRecord( + makeRecord(), + "client-1", + "target-1", + undefined, + 1, + ); + + expect(result).toBe("retry"); expect(mockChangeVisibility).toHaveBeenCalled(); const [, delaySec] = mockChangeVisibility.mock.calls[0] as [ @@ -174,31 +178,29 @@ describe("handleRateLimitedRecord", () => { describe("429 over retry period but under SQS limit", () => { it("retries when Retry-After exceeds max retry duration but is within SQS max visibility", async () => { - await expect( - handleRateLimitedRecord( - makeRecord(), - "client-1", - "target-1", - "10000", - 1, - ), - ).rejects.toThrow(ClientRateLimitedError); + const result = await handleRateLimitedRecord( + makeRecord(), + "client-1", + "target-1", + "10000", + 1, + ); + expect(result).toBe("retry"); expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 10_000); expect(mockSendToDlq).not.toHaveBeenCalled(); }); it("retries at SQS max visibility boundary (43200s)", async () => { - await expect( - handleRateLimitedRecord( - makeRecord(), - "client-1", - "target-1", - "43200", - 1, - ), - ).rejects.toThrow(ClientRateLimitedError); + const result = await handleRateLimitedRecord( + makeRecord(), + "client-1", + "target-1", + "43200", + 1, + ); + expect(result).toBe("retry"); expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 43_200); expect(mockSendToDlq).not.toHaveBeenCalled(); }); @@ -206,7 +208,7 @@ describe("handleRateLimitedRecord", () => { describe("429 over SQS limit", () => { it("sends to DLQ when Retry-After exceeds SQS max visibility", async () => { - await handleRateLimitedRecord( + const result = await handleRateLimitedRecord( makeRecord(), "client-1", "target-1", @@ -214,21 +216,21 @@ describe("handleRateLimitedRecord", () => { 1, ); + expect(result).toBe("dlq"); expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body); expect(mockChangeVisibility).not.toHaveBeenCalled(); }); - it("does not throw when sending to DLQ", async () => { - await expect( - handleRateLimitedRecord( - makeRecord(), - "client-1", - "target-1", - "43201", - 1, - ), - ).resolves.toBeUndefined(); + it("returns dlq without throwing", async () => { + const result = await handleRateLimitedRecord( + makeRecord(), + "client-1", + "target-1", + "43201", + 1, + ); + expect(result).toBe("dlq"); expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body); }); }); diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index 956cfa79..522513a1 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -19,8 +19,6 @@ import { isWindowExhausted, jitteredBackoffSeconds, } from "services/delivery/retry-policy"; -import { ClientRateLimitedError } from "services/client-rate-limited-error"; -import { VisibilityManagedError } from "services/visibility-managed-error"; import { type EndpointGateConfig, admit, @@ -74,12 +72,20 @@ type CallbackDeliveryMessage = { targetId: string; }; +type DeliveryOutcome = "success" | "retry" | "dlq"; + type TargetBatch = { targetId: string; records: SQSRecord[]; messages: CallbackDeliveryMessage[]; }; +type TargetBatchResult = { + failures: SQSBatchItemFailure[]; + deliveredCount: number; + dlqCount: number; +}; + function groupByTarget(records: SQSRecord[]): TargetBatch[] { const groups = new Map< string, @@ -118,7 +124,7 @@ async function deliverRecord( target: Awaited>, applicationId: string, clientId: string, -): Promise<{ success: boolean; dlq: boolean }> { +): Promise { const correlationId = extractCorrelationId(message); const receiveCount = Number(record.attributes.ApproximateReceiveCount); @@ -142,7 +148,7 @@ async function deliverRecord( if (isWindowExhausted(firstReceivedMs, maxRetryDurationMs)) { recordRetryWindowExhausted(clientId, message.targetId, correlationId); await sendToDlq(record.body); - return { success: true, dlq: true }; + return "dlq"; } const agent = await buildAgent(target); @@ -166,7 +172,7 @@ async function deliverRecord( if (result.outcome === OUTCOME_SUCCESS) { recordDeliverySuccess(clientId, message.targetId, correlationId); - return { success: true, dlq: false }; + return "success"; } if (result.outcome === OUTCOME_PERMANENT_FAILURE) { @@ -178,19 +184,18 @@ async function deliverRecord( correlationId, ); await sendToDlq(record.body, result); - return { success: true, dlq: true }; + return "dlq"; } if (result.outcome === OUTCOME_RATE_LIMITED) { recordDeliveryRateLimited(clientId, message.targetId, correlationId); - await handleRateLimitedRecord( + return handleRateLimitedRecord( record, clientId, message.targetId, result.retryAfterHeader, receiveCount, ); - return { success: true, dlq: false }; } const backoffSec = jitteredBackoffSeconds(receiveCount); @@ -203,14 +208,31 @@ async function deliverRecord( correlationId, ); await changeVisibility(record.receiptHandle, backoffSec); - return { success: false, dlq: false }; + return "retry"; } -type TargetBatchResult = { - failures: SQSBatchItemFailure[]; - deliveredCount: number; - dlqCount: number; -}; +async function denyRecords( + records: SQSRecord[], + messages: CallbackDeliveryMessage[], + clientId: string, + targetId: string, + reason: string, + delaySec: (receiveCount: number) => number, +): Promise { + recordAdmissionDenied( + clientId, + targetId, + reason, + messages.map((m) => extractCorrelationId(m)), + ); + const failures: SQSBatchItemFailure[] = []; + for (const record of records) { + const receiveCount = Number(record.attributes.ApproximateReceiveCount); + await changeVisibility(record.receiptHandle, delaySec(receiveCount)); + failures.push({ itemIdentifier: record.messageId }); + } + return failures; +} async function handleBatchDenied( batch: TargetBatch, @@ -219,18 +241,15 @@ async function handleBatchDenied( retryAfterMs: number, ): Promise { const baseDelaySec = Math.max(1, Math.ceil(retryAfterMs / 1000)); - const correlationIds = batch.messages.map((m) => extractCorrelationId(m)); - recordAdmissionDenied(clientId, batch.targetId, reason, correlationIds); - const failures: SQSBatchItemFailure[] = []; - for (const record of batch.records) { - const receiveCount = Number(record.attributes.ApproximateReceiveCount); - const delaySec = Math.min( - receiveCount * baseDelaySec, - SQS_MAX_VISIBILITY_TIMEOUT_SEC, - ); - await changeVisibility(record.receiptHandle, delaySec); - failures.push({ itemIdentifier: record.messageId }); - } + const failures = await denyRecords( + batch.records, + batch.messages, + clientId, + batch.targetId, + reason, + (receiveCount) => + Math.min(receiveCount * baseDelaySec, SQS_MAX_VISIBILITY_TIMEOUT_SEC), + ); return { failures, deliveredCount: 0, dlqCount: 0 }; } @@ -275,7 +294,6 @@ async function processTargetBatch( const applicationId = await getApplicationId(clientId); const failures: SQSBatchItemFailure[] = []; - let processingFailures = 0; const admittedPairs = admitted.map( (record, i): { record: SQSRecord; message: CallbackDeliveryMessage } => ({ @@ -289,7 +307,7 @@ async function processTargetBatch( async ({ message, record, - }): Promise<{ record: SQSRecord; success: boolean; dlq: boolean }> => { + }): Promise<{ outcome: DeliveryOutcome; record: SQSRecord }> => { try { const outcome = await deliverRecord( record, @@ -298,7 +316,7 @@ async function processTargetBatch( applicationId, clientId, ); - return { record, success: outcome.success, dlq: outcome.dlq }; + return { outcome, record }; } catch (error) { const correlationId = extractCorrelationId(message); logger.error("Failed to process record", { @@ -307,40 +325,39 @@ async function processTargetBatch( err: error, }); - if (error instanceof ClientRateLimitedError) { - return { record, success: false, dlq: false }; - } - - if (error instanceof VisibilityManagedError) { - const receiveCount = Number( - record.attributes.ApproximateReceiveCount, - ); - await changeVisibility( - record.receiptHandle, - jitteredBackoffSeconds(receiveCount), - ); - return { record, success: false, dlq: false }; - } - await sendToDlq(record.body); - return { record, success: true, dlq: true }; + return { outcome: "dlq", record }; } }, { concurrency: concurrencyLimit }, ); - for (const { record, success } of deliveryResults) { - if (!success) { - processingFailures += 1; - failures.push({ itemIdentifier: record.messageId }); + let processingFailures = 0; + let deliveredCount = 0; + let dlqCount = 0; + + for (const { outcome, record } of deliveryResults) { + switch (outcome) { + case "retry": { + processingFailures += 1; + failures.push({ itemIdentifier: record.messageId }); + break; + } + case "dlq": { + dlqCount += 1; + break; + } + case "success": { + deliveredCount += 1; + break; + } + default: { + const exhaustiveCheck: never = outcome; + throw new Error(`Unexpected outcome: ${exhaustiveCheck}`); + } } } - const deliveredCount = deliveryResults.filter( - (r) => r.success && !r.dlq, - ).length; - const dlqCount = deliveryResults.filter((r) => r.dlq).length; - if (cbEnabled && consumedTokens > 0) { const cbOutcome = await recordResult( redis, @@ -362,21 +379,16 @@ async function processTargetBatch( if (rejected.length > 0) { const rejectedMessages = batch.messages.slice(consumedTokens); - const rejectedCorrelationIds = rejectedMessages.map((m) => - extractCorrelationId(m), + failures.push( + ...(await denyRecords( + rejected, + rejectedMessages, + clientId, + batch.targetId, + "rate_limited", + (n) => n, + )), ); - recordAdmissionDenied( - clientId, - batch.targetId, - "rate_limited", - rejectedCorrelationIds, - ); - for (const record of rejected) { - const receiveCount = Number(record.attributes.ApproximateReceiveCount); - const delaySec = receiveCount * 1; - await changeVisibility(record.receiptHandle, delaySec); - failures.push({ itemIdentifier: record.messageId }); - } } return { failures, deliveredCount, dlqCount }; @@ -423,7 +435,7 @@ export async function processRecords( batchSize: records.length, deliveredCount: totalDelivered, dlqCount: totalDlq, - failureCount: allFailures.length, + retryCount: allFailures.length, }); await flushMetrics(); diff --git a/lambdas/https-client-lambda/src/services/client-rate-limited-error.ts b/lambdas/https-client-lambda/src/services/client-rate-limited-error.ts deleted file mode 100644 index f1968e7b..00000000 --- a/lambdas/https-client-lambda/src/services/client-rate-limited-error.ts +++ /dev/null @@ -1 +0,0 @@ -export class ClientRateLimitedError extends Error {} diff --git a/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts b/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts index 6baa692e..533a3953 100644 --- a/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts +++ b/lambdas/https-client-lambda/src/services/delivery/retry-policy.ts @@ -2,7 +2,6 @@ import type { SQSRecord } from "aws-lambda"; import { logger } from "@nhs-notify-client-callbacks/logger"; import { sendToDlq } from "services/dlq-sender"; import { changeVisibility } from "services/sqs-visibility"; -import { ClientRateLimitedError } from "services/client-rate-limited-error"; const BACKOFF_CAP_SECONDS = 300; const SQS_MAX_VISIBILITY_SECONDS = 43_200; @@ -50,7 +49,7 @@ export async function handleRateLimitedRecord( targetId: string, retryAfterHeader: string | undefined, receiveCount: number, -): Promise { +): Promise<"retry" | "dlq"> { const retryAfterSeconds = retryAfterHeader ? parseRetryAfter(retryAfterHeader) : 0; @@ -62,7 +61,7 @@ export async function handleRateLimitedRecord( retryAfterSeconds, }); await sendToDlq(record.body); - return; + return "dlq"; } const delaySec = @@ -76,5 +75,5 @@ export async function handleRateLimitedRecord( delaySec, }); await changeVisibility(record.receiptHandle, delaySec); - throw new ClientRateLimitedError("Rate limited — requeue"); + return "retry"; } diff --git a/lambdas/https-client-lambda/src/services/visibility-managed-error.ts b/lambdas/https-client-lambda/src/services/visibility-managed-error.ts deleted file mode 100644 index 403c2162..00000000 --- a/lambdas/https-client-lambda/src/services/visibility-managed-error.ts +++ /dev/null @@ -1 +0,0 @@ -export class VisibilityManagedError extends Error {} From e996fbe24cc5fce185a11b38be895c5f782a498d Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 14:08:43 +0100 Subject: [PATCH 56/65] Refactor http lambda handler - improved readability of processTargetBatch --- lambdas/https-client-lambda/src/handler.ts | 160 ++++++++++++--------- 1 file changed, 95 insertions(+), 65 deletions(-) diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index 522513a1..6310fe82 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -253,57 +253,27 @@ async function handleBatchDenied( return { failures, deliveredCount: 0, dlqCount: 0 }; } -async function processTargetBatch( - batch: TargetBatch, - redis: RedisClientType, +async function deliverAdmittedRecords( + records: SQSRecord[], + messages: CallbackDeliveryMessage[], + target: Awaited>, + applicationId: string, clientId: string, concurrencyLimit: number, -): Promise { - const target = await loadTargetConfig(clientId, batch.targetId); - const cbEnabled = - target.delivery?.circuitBreaker?.enabled ?? Boolean(target.delivery); - - const targetBurstCapacity = Math.min( - target.invocationRateLimit * BURST_MULTIPLIER, - MAX_BURST_CAPACITY, - ); - - const gateResult = await admit( - redis, - batch.targetId, - target.invocationRateLimit, - cbEnabled, - batch.records.length, - { ...gateConfig, burstCapacity: targetBurstCapacity }, - ); - - if (!gateResult.allowed) { - return handleBatchDenied( - batch, - clientId, - gateResult.reason, - gateResult.retryAfterMs, - ); - } - - const { consumedTokens } = gateResult; - const admitted = batch.records.slice(0, consumedTokens); - const rejected = batch.records.slice(consumedTokens); - const admittedMessages = batch.messages.slice(0, consumedTokens); - - const applicationId = await getApplicationId(clientId); - - const failures: SQSBatchItemFailure[] = []; - - const admittedPairs = admitted.map( - (record, i): { record: SQSRecord; message: CallbackDeliveryMessage } => ({ +): Promise<{ + deliveredCount: number; + dlqCount: number; + failures: SQSBatchItemFailure[]; +}> { + const pairs = records.map( + (record, i): { message: CallbackDeliveryMessage; record: SQSRecord } => ({ + message: messages[i], // eslint-disable-line security/detect-object-injection -- numeric .map() index record, - message: admittedMessages[i], // eslint-disable-line security/detect-object-injection -- i is the numeric index from .map(), not user input }), ); - const deliveryResults = await pMap( - admittedPairs, + const results = await pMap( + pairs, async ({ message, record, @@ -332,14 +302,13 @@ async function processTargetBatch( { concurrency: concurrencyLimit }, ); - let processingFailures = 0; + const failures: SQSBatchItemFailure[] = []; let deliveredCount = 0; let dlqCount = 0; - for (const { outcome, record } of deliveryResults) { + for (const { outcome, record } of results) { switch (outcome) { case "retry": { - processingFailures += 1; failures.push({ itemIdentifier: record.messageId }); break; } @@ -358,31 +327,92 @@ async function processTargetBatch( } } + return { deliveredCount, dlqCount, failures }; +} + +async function reportCircuitBreaker( + redis: RedisClientType, + targetId: string, + consumedTokens: number, + processingFailures: number, +): Promise { + const cbOutcome = await recordResult( + redis, + targetId, + consumedTokens, + processingFailures, + gateConfig, + ); + if (cbOutcome.circuitSwitched && cbOutcome.circuitState === "open") { + recordCircuitBreakerOpen(targetId); + } + if ( + cbOutcome.circuitSwitched && + cbOutcome.circuitState === "closed_recovery" + ) { + recordCircuitBreakerClosed(targetId); + } +} + +async function processTargetBatch( + batch: TargetBatch, + redis: RedisClientType, + clientId: string, + concurrencyLimit: number, +): Promise { + const target = await loadTargetConfig(clientId, batch.targetId); + const cbEnabled = + target.delivery?.circuitBreaker?.enabled ?? Boolean(target.delivery); + + const targetBurstCapacity = Math.min( + target.invocationRateLimit * BURST_MULTIPLIER, + MAX_BURST_CAPACITY, + ); + + const gateResult = await admit( + redis, + batch.targetId, + target.invocationRateLimit, + cbEnabled, + batch.records.length, + { ...gateConfig, burstCapacity: targetBurstCapacity }, + ); + + if (!gateResult.allowed) { + return handleBatchDenied( + batch, + clientId, + gateResult.reason, + gateResult.retryAfterMs, + ); + } + + const { consumedTokens } = gateResult; + const applicationId = await getApplicationId(clientId); + + const { deliveredCount, dlqCount, failures } = await deliverAdmittedRecords( + batch.records.slice(0, consumedTokens), + batch.messages.slice(0, consumedTokens), + target, + applicationId, + clientId, + concurrencyLimit, + ); + if (cbEnabled && consumedTokens > 0) { - const cbOutcome = await recordResult( + await reportCircuitBreaker( redis, batch.targetId, consumedTokens, - processingFailures, - gateConfig, + failures.length, ); - if (cbOutcome.circuitSwitched && cbOutcome.circuitState === "open") { - recordCircuitBreakerOpen(batch.targetId); - } - if ( - cbOutcome.circuitSwitched && - cbOutcome.circuitState === "closed_recovery" - ) { - recordCircuitBreakerClosed(batch.targetId); - } } - if (rejected.length > 0) { - const rejectedMessages = batch.messages.slice(consumedTokens); + if (consumedTokens < batch.records.length) { failures.push( ...(await denyRecords( - rejected, - rejectedMessages, + batch.records.slice(consumedTokens), + batch.messages.slice(consumedTokens), clientId, batch.targetId, "rate_limited", @@ -391,7 +421,7 @@ async function processTargetBatch( ); } - return { failures, deliveredCount, dlqCount }; + return { deliveredCount, dlqCount, failures }; } export async function processRecords( From 740f513047d215b322b721a714f5e2cc1215e376 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 14:36:03 +0100 Subject: [PATCH 57/65] Improve http lambda error resilience --- .../src/__tests__/handler.test.ts | 82 ++++++++++++++ lambdas/https-client-lambda/src/handler.ts | 101 +++++++++++++----- 2 files changed, 156 insertions(+), 27 deletions(-) diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index f11a667f..ab35893f 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -853,4 +853,86 @@ describe("processRecords", () => { }), ); }); + + it("sends unparseable records to DLQ and logs error", async () => { + const badRecord = makeRecord({ + messageId: "bad-msg", + body: "not-valid-json{{{", + }); + const goodRecord = makeRecord({ messageId: "good-msg" }); + + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); + + const result = await processRecords([badRecord, goodRecord]); + + expect(mockSendToDlq).toHaveBeenCalledWith(badRecord.body); + expect(logger.error).toHaveBeenCalledWith( + "Unparseable message body \u2014 sending to DLQ", + expect.objectContaining({ messageId: "bad-msg" }), + ); + expect(result).toHaveLength(0); + expect(logger.info).toHaveBeenCalledWith( + "Batch complete", + expect.objectContaining({ dlqCount: 1, deliveredCount: 1 }), + ); + }); + + it("returns record for retry when DLQ send fails after delivery error", async () => { + mockDeliverPayload.mockRejectedValue(new Error("Connection reset")); + mockSendToDlq.mockRejectedValue(new Error("DLQ unavailable")); + + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); + + const result = await processRecords([makeRecord()]); + + expect(logger.error).toHaveBeenCalledWith( + "DLQ send also failed \u2014 returning for retry", + expect.objectContaining({ messageId: "msg-1" }), + ); + expect(result).toHaveLength(1); + expect(result[0].itemIdentifier).toBe("msg-1"); + }); + + it("swallows reportCircuitBreaker errors without affecting delivery", async () => { + mockRecordResult.mockRejectedValue(new Error("Redis timeout")); + + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); + + const result = await processRecords([makeRecord()]); + + expect(logger.error).toHaveBeenCalledWith( + "Failed to report circuit breaker result", + expect.objectContaining({ targetId: "target-1" }), + ); + expect(result).toHaveLength(0); + expect(logger.info).toHaveBeenCalledWith( + "Batch complete", + expect.objectContaining({ deliveredCount: 1 }), + ); + }); + + it("sends all batch records to DLQ when processTargetBatch throws", async () => { + mockLoadTargetConfig.mockRejectedValue( + new Error("Config service unavailable"), + ); + + const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); + + const records = [ + makeRecord({ messageId: "msg-1" }), + makeRecord({ messageId: "msg-2" }), + ]; + const result = await processRecords(records); + + expect(logger.error).toHaveBeenCalledWith( + "Target batch failed \u2014 sending all records to DLQ", + expect.objectContaining({ targetId: "target-1" }), + ); + expect(mockSendToDlq).toHaveBeenCalledTimes(2); + expect(result).toHaveLength(0); + expect(logger.info).toHaveBeenCalledWith( + "Batch complete", + expect.objectContaining({ dlqCount: 2 }), + ); + }); }); diff --git a/lambdas/https-client-lambda/src/handler.ts b/lambdas/https-client-lambda/src/handler.ts index 6310fe82..89975f28 100644 --- a/lambdas/https-client-lambda/src/handler.ts +++ b/lambdas/https-client-lambda/src/handler.ts @@ -86,20 +86,33 @@ type TargetBatchResult = { dlqCount: number; }; -function groupByTarget(records: SQSRecord[]): TargetBatch[] { +function groupByTarget( + records: SQSRecord[], + unparseable: SQSRecord[], +): TargetBatch[] { const groups = new Map< string, { records: SQSRecord[]; messages: CallbackDeliveryMessage[] } >(); for (const record of records) { - const message: CallbackDeliveryMessage = JSON.parse(record.body); - const existing = groups.get(message.targetId); - if (existing) { - existing.records.push(record); - existing.messages.push(message); - } else { - groups.set(message.targetId, { records: [record], messages: [message] }); + let message: CallbackDeliveryMessage | undefined; + try { + message = JSON.parse(record.body); + } catch { + unparseable.push(record); + } + if (message) { + const existing = groups.get(message.targetId); + if (existing) { + existing.records.push(record); + existing.messages.push(message); + } else { + groups.set(message.targetId, { + records: [record], + messages: [message], + }); + } } } @@ -295,8 +308,16 @@ async function deliverAdmittedRecords( err: error, }); - await sendToDlq(record.body); - return { outcome: "dlq", record }; + try { + await sendToDlq(record.body); + return { outcome: "dlq", record }; + } catch (dlqError) { + logger.error("DLQ send also failed — returning for retry", { + messageId: record.messageId, + err: dlqError, + }); + return { outcome: "retry", record }; + } } }, { concurrency: concurrencyLimit }, @@ -400,12 +421,19 @@ async function processTargetBatch( ); if (cbEnabled && consumedTokens > 0) { - await reportCircuitBreaker( - redis, - batch.targetId, - consumedTokens, - failures.length, - ); + try { + await reportCircuitBreaker( + redis, + batch.targetId, + consumedTokens, + failures.length, + ); + } catch (error) { + logger.error("Failed to report circuit breaker result", { + targetId: batch.targetId, + err: error, + }); + } } if (consumedTokens < batch.records.length) { @@ -443,22 +471,41 @@ export async function processRecords( logger.info("Batch received", { batchSize: records.length }); const redis = await getRedisClient(); - const targetBatches = groupByTarget(records); + const unparseable: SQSRecord[] = []; + const targetBatches = groupByTarget(records, unparseable); + + for (const record of unparseable) { + logger.error("Unparseable message body — sending to DLQ", { + messageId: record.messageId, + }); + await sendToDlq(record.body); + } const allFailures: SQSBatchItemFailure[] = []; let totalDelivered = 0; - let totalDlq = 0; + let totalDlq = unparseable.length; for (const batch of targetBatches) { - const batchResult = await processTargetBatch( - batch, - redis, - CLIENT_ID, - concurrencyLimit, - ); - allFailures.push(...batchResult.failures); - totalDelivered += batchResult.deliveredCount; - totalDlq += batchResult.dlqCount; + try { + const batchResult = await processTargetBatch( + batch, + redis, + CLIENT_ID, + concurrencyLimit, + ); + allFailures.push(...batchResult.failures); + totalDelivered += batchResult.deliveredCount; + totalDlq += batchResult.dlqCount; + } catch (error) { + logger.error("Target batch failed — sending all records to DLQ", { + targetId: batch.targetId, + err: error, + }); + for (const record of batch.records) { + await sendToDlq(record.body); + } + totalDlq += batch.records.length; + } } logger.info("Batch complete", { From cbf01fc081c1498a3b1f39d488d7f034e677fda7 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 14:50:01 +0100 Subject: [PATCH 58/65] Fix flawed http lambda tests --- .../src/__tests__/handler.test.ts | 46 +++++++++++-------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index ab35893f..3314342f 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -288,9 +288,14 @@ describe("processRecords", () => { effectiveRate: 10, }); - await processRecords([makeRecord()]); + await processRecords([ + makeRecord({ messageId: "msg-1", receiptHandle: "receipt-1" }), + makeRecord({ messageId: "msg-2", receiptHandle: "receipt-2" }), + ]); - expect(mockChangeVisibility).toHaveBeenCalledTimes(1); + expect(mockChangeVisibility).toHaveBeenCalledTimes(2); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 2); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-2", 2); }); it("caps visibility delay at SQS maximum (12 hours) for admission-denied batch", async () => { @@ -400,11 +405,17 @@ describe("processRecords", () => { effectiveRate: 10, }); - const failures = await processRecords([makeRecord()]); + const failures = await processRecords([ + makeRecord({ messageId: "msg-1", receiptHandle: "receipt-1" }), + makeRecord({ messageId: "msg-2", receiptHandle: "receipt-2" }), + ]); - expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); - const visibilityDelay = mockChangeVisibility.mock.calls[0]![1] as number; - expect(visibilityDelay).toBe(2); + expect(failures).toEqual([ + { itemIdentifier: "msg-1" }, + { itemIdentifier: "msg-2" }, + ]); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 2); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-2", 2); expect(mockSendToDlq).not.toHaveBeenCalled(); expect(mockDeliverPayload).not.toHaveBeenCalled(); }); @@ -417,11 +428,17 @@ describe("processRecords", () => { effectiveRate: 0, }); - const failures = await processRecords([makeRecord()]); + const failures = await processRecords([ + makeRecord({ messageId: "msg-1", receiptHandle: "receipt-1" }), + makeRecord({ messageId: "msg-2", receiptHandle: "receipt-2" }), + ]); - expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); - const visibilityDelay = mockChangeVisibility.mock.calls[0]![1] as number; - expect(visibilityDelay).toBe(30); + expect(failures).toEqual([ + { itemIdentifier: "msg-1" }, + { itemIdentifier: "msg-2" }, + ]); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 30); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-2", 30); expect(mockSendToDlq).not.toHaveBeenCalled(); expect(mockDeliverPayload).not.toHaveBeenCalled(); }); @@ -747,14 +764,7 @@ describe("processRecords", () => { ); }); - it("returns no failure when handleRateLimitedRecord resolves without throwing", async () => { - mockDeliverPayload.mockResolvedValue({ - outcome: "permanent_failure", - statusCode: 429, - retryAfterHeader: "60", - }); - mockHandleRateLimitedRecord.mockResolvedValueOnce("dlq"); - + it("uses default maxRetryDurationMs when target has no maxRetryDurationSeconds", async () => { const failures = await processRecords([makeRecord()]); expect(failures).toEqual([]); From 0011275b046fd1f947f4c2d0b8253fca7f61079b Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 14:58:48 +0100 Subject: [PATCH 59/65] Better grouping on handler tests --- .../src/__tests__/handler.test.ts | 1431 +++++++++-------- 1 file changed, 730 insertions(+), 701 deletions(-) diff --git a/lambdas/https-client-lambda/src/__tests__/handler.test.ts b/lambdas/https-client-lambda/src/__tests__/handler.test.ts index 3314342f..853d6e63 100644 --- a/lambdas/https-client-lambda/src/__tests__/handler.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/handler.test.ts @@ -120,829 +120,858 @@ describe("processRecords", () => { delete process.env.CLIENT_ID; }); - it("returns no failures on successful delivery", async () => { - const failures = await processRecords([makeRecord()]); - - expect(failures).toEqual([]); - expect(mockLoadTargetConfig).toHaveBeenCalledWith("client-1", "target-1"); - expect(mockGetApplicationId).toHaveBeenCalledWith("client-1"); - expect(mockSignPayload).toHaveBeenCalledWith( - "app-id-1", - "secret-key", - expect.objectContaining({ data: expect.any(Array) }), - ); - expect(mockBuildAgent).toHaveBeenCalledWith(DEFAULT_TARGET); - expect(mockDeliverPayload).toHaveBeenCalledWith( - DEFAULT_TARGET, - expect.any(String), - "signature-abc", - mockAgent, - ); - }); - - it("sends permanent failure to DLQ and returns no failure", async () => { - mockDeliverPayload.mockResolvedValue({ outcome: "permanent_failure" }); - - const failures = await processRecords([makeRecord()]); - - expect(failures).toEqual([]); - expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body, { - outcome: "permanent_failure", - }); - }); - - it("returns failure for transient 5xx errors", async () => { - mockDeliverPayload.mockResolvedValue({ - outcome: "transient_failure", - statusCode: 503, - }); - - const failures = await processRecords([makeRecord()]); - - expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); - }); - - it("returns failure for 429 when handleRateLimitedRecord rejects", async () => { - mockDeliverPayload.mockResolvedValue({ - outcome: "rate_limited", - retryAfterHeader: "60", + describe("delivery outcomes", () => { + it("returns no failures on successful delivery", async () => { + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([]); + expect(mockLoadTargetConfig).toHaveBeenCalledWith("client-1", "target-1"); + expect(mockGetApplicationId).toHaveBeenCalledWith("client-1"); + expect(mockSignPayload).toHaveBeenCalledWith( + "app-id-1", + "secret-key", + expect.objectContaining({ data: expect.any(Array) }), + ); + expect(mockBuildAgent).toHaveBeenCalledWith(DEFAULT_TARGET); + expect(mockDeliverPayload).toHaveBeenCalledWith( + DEFAULT_TARGET, + expect.any(String), + "signature-abc", + mockAgent, + ); + }); + + it("sends permanent failure to DLQ and returns no failure", async () => { + mockDeliverPayload.mockResolvedValue({ outcome: "permanent_failure" }); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([]); + expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body, { + outcome: "permanent_failure", + }); }); - const failures = await processRecords([makeRecord()]); - - expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); - expect(mockHandleRateLimitedRecord).toHaveBeenCalledWith( - makeRecord(), - "client-1", - "target-1", - "60", - 1, - ); - }); - - it("processes multiple records in a single target batch", async () => { - const record1 = makeRecord({ messageId: "msg-1" }); - const record2 = makeRecord({ messageId: "msg-2" }); - - mockDeliverPayload - .mockResolvedValueOnce({ outcome: "success" }) - .mockResolvedValueOnce({ + it("returns failure for transient 5xx errors", async () => { + mockDeliverPayload.mockResolvedValue({ outcome: "transient_failure", - statusCode: 500, + statusCode: 503, }); - const failures = await processRecords([record1, record2]); + const failures = await processRecords([makeRecord()]); - expect(failures).toEqual([{ itemIdentifier: "msg-2" }]); - expect(mockAdmit).toHaveBeenCalledTimes(1); - }); - - it("delivers only admitted records when consumedTokens is less than batch size", async () => { - const record1 = makeRecord({ - messageId: "msg-1", - receiptHandle: "receipt-1", - }); - const record2 = makeRecord({ - messageId: "msg-2", - receiptHandle: "receipt-2", - }); - const record3 = makeRecord({ - messageId: "msg-3", - receiptHandle: "receipt-3", + expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); }); - mockAdmit.mockResolvedValue({ - allowed: true, - consumedTokens: 1, - effectiveRate: 10, - }); + it("returns failure for 429 when handleRateLimitedRecord rejects", async () => { + mockDeliverPayload.mockResolvedValue({ + outcome: "rate_limited", + retryAfterHeader: "60", + }); - const { recordAdmissionDenied } = jest.requireMock( - "services/delivery-observability", - ); + const failures = await processRecords([makeRecord()]); - const failures = await processRecords([record1, record2, record3]); + expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); + expect(mockHandleRateLimitedRecord).toHaveBeenCalledWith( + makeRecord(), + "client-1", + "target-1", + "60", + 1, + ); + }); - expect(mockDeliverPayload).toHaveBeenCalledTimes(1); - expect(failures).toEqual([ - { itemIdentifier: "msg-2" }, - { itemIdentifier: "msg-3" }, - ]); - expect(recordAdmissionDenied).toHaveBeenCalledWith( - "client-1", - "target-1", - "rate_limited", - ["test-message-id", "test-message-id"], - ); + it("returns no failure when handleRateLimitedRecord resolves (e.g. DLQ path)", async () => { + mockDeliverPayload.mockResolvedValue({ + outcome: "rate_limited", + retryAfterHeader: "99999", + }); + mockHandleRateLimitedRecord.mockResolvedValue("dlq"); - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-2", 1); - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-3", 1); - }); + const failures = await processRecords([makeRecord()]); - it("an unexpected delivery error does not prevent other records in the batch", async () => { - const record1 = makeRecord({ messageId: "msg-1" }); - const record2 = makeRecord({ messageId: "msg-2" }); + expect(failures).toEqual([]); + }); - mockDeliverPayload - .mockRejectedValueOnce(new Error("Connection reset")) - .mockResolvedValueOnce({ outcome: "success" }); + it("sends to DLQ when retry window is exhausted", async () => { + mockIsWindowExhausted.mockReturnValue(true); - const failures = await processRecords([record1, record2]); + const failures = await processRecords([makeRecord()]); - expect(failures).toEqual([]); - expect(mockSendToDlq).toHaveBeenCalledWith(record1.body); + expect(failures).toEqual([]); + expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body); + expect(mockDeliverPayload).not.toHaveBeenCalled(); + }); }); - it("sends unhandled errors to DLQ", async () => { - mockDeliverPayload.mockRejectedValue(new Error("Infrastructure error")); + describe("batch processing", () => { + it("processes multiple records in a single target batch", async () => { + const record1 = makeRecord({ messageId: "msg-1" }); + const record2 = makeRecord({ messageId: "msg-2" }); - const failures = await processRecords([makeRecord()]); + mockDeliverPayload + .mockResolvedValueOnce({ outcome: "success" }) + .mockResolvedValueOnce({ + outcome: "transient_failure", + statusCode: 500, + }); - expect(failures).toEqual([]); - expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body); - expect(mockChangeVisibility).not.toHaveBeenCalled(); - }); + const failures = await processRecords([record1, record2]); - it("does not override visibility on rate-limited requeue", async () => { - mockDeliverPayload.mockResolvedValue({ - outcome: "rate_limited", - retryAfterHeader: "120", - statusCode: 429, - }); - mockHandleRateLimitedRecord.mockImplementation(async () => { - await mockChangeVisibility("receipt-1", 120); - return "retry"; + expect(failures).toEqual([{ itemIdentifier: "msg-2" }]); + expect(mockAdmit).toHaveBeenCalledTimes(1); }); - await processRecords([makeRecord()]); - - expect(mockChangeVisibility).toHaveBeenCalledTimes(1); - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 120); - }); - - it("changes visibility once per record for admission-denied batch", async () => { - mockAdmit.mockResolvedValue({ - allowed: false, - reason: "rate_limited", - retryAfterMs: 2000, - effectiveRate: 10, - }); + it("delivers only admitted records when consumedTokens is less than batch size", async () => { + const record1 = makeRecord({ + messageId: "msg-1", + receiptHandle: "receipt-1", + }); + const record2 = makeRecord({ + messageId: "msg-2", + receiptHandle: "receipt-2", + }); + const record3 = makeRecord({ + messageId: "msg-3", + receiptHandle: "receipt-3", + }); - await processRecords([ - makeRecord({ messageId: "msg-1", receiptHandle: "receipt-1" }), - makeRecord({ messageId: "msg-2", receiptHandle: "receipt-2" }), - ]); + mockAdmit.mockResolvedValue({ + allowed: true, + consumedTokens: 1, + effectiveRate: 10, + }); - expect(mockChangeVisibility).toHaveBeenCalledTimes(2); - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 2); - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-2", 2); - }); + const { recordAdmissionDenied } = jest.requireMock( + "services/delivery-observability", + ); + + const failures = await processRecords([record1, record2, record3]); + + expect(mockDeliverPayload).toHaveBeenCalledTimes(1); + expect(failures).toEqual([ + { itemIdentifier: "msg-2" }, + { itemIdentifier: "msg-3" }, + ]); + expect(recordAdmissionDenied).toHaveBeenCalledWith( + "client-1", + "target-1", + "rate_limited", + ["test-message-id", "test-message-id"], + ); + + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-2", 1); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-3", 1); + }); + + it("groups records by target and processes each batch separately", async () => { + const record1 = makeRecord({ messageId: "msg-1" }); + const record2 = makeRecord({ + messageId: "msg-2", + body: JSON.stringify({ + payload: { + data: [ + { + type: "MessageStatus", + attributes: { messageStatus: "delivered" }, + }, + ], + }, + subscriptionId: "sub-2", + targetId: "target-2", + }), + }); - it("caps visibility delay at SQS maximum (12 hours) for admission-denied batch", async () => { - mockAdmit.mockResolvedValue({ - allowed: false, - reason: "rate_limited", - retryAfterMs: 60_000, - effectiveRate: 10, - }); + const failures = await processRecords([record1, record2]); - const record = makeRecord({ - attributes: { - ApproximateReceiveCount: "1000", - SentTimestamp: "0", - SenderId: "sender", - ApproximateFirstReceiveTimestamp: "0", - }, + expect(failures).toEqual([]); + expect(mockAdmit).toHaveBeenCalledTimes(2); + expect(mockLoadTargetConfig).toHaveBeenCalledWith("client-1", "target-1"); + expect(mockLoadTargetConfig).toHaveBeenCalledWith("client-1", "target-2"); }); - - await processRecords([record]); - - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 43_200); }); - it("changes visibility once for transient failure", async () => { - mockDeliverPayload.mockResolvedValue({ - outcome: "transient_failure", - statusCode: 503, - }); - - await processRecords([makeRecord()]); + describe("endpoint gate", () => { + it("requeues all records when rate limited by endpoint gate", async () => { + mockAdmit.mockResolvedValue({ + allowed: false, + reason: "rate_limited", + retryAfterMs: 2000, + effectiveRate: 10, + }); - expect(mockChangeVisibility).toHaveBeenCalledTimes(1); - }); + const failures = await processRecords([ + makeRecord({ messageId: "msg-1", receiptHandle: "receipt-1" }), + makeRecord({ messageId: "msg-2", receiptHandle: "receipt-2" }), + ]); + + expect(failures).toEqual([ + { itemIdentifier: "msg-1" }, + { itemIdentifier: "msg-2" }, + ]); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 2); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-2", 2); + expect(mockSendToDlq).not.toHaveBeenCalled(); + expect(mockDeliverPayload).not.toHaveBeenCalled(); + }); + + it("requeues all records when circuit is open", async () => { + mockAdmit.mockResolvedValue({ + allowed: false, + reason: "circuit_open", + retryAfterMs: 30_000, + effectiveRate: 0, + }); - it("sends all records to DLQ when CLIENT_ID is not set", async () => { - delete process.env.CLIENT_ID; + const failures = await processRecords([ + makeRecord({ messageId: "msg-1", receiptHandle: "receipt-1" }), + makeRecord({ messageId: "msg-2", receiptHandle: "receipt-2" }), + ]); + + expect(failures).toEqual([ + { itemIdentifier: "msg-1" }, + { itemIdentifier: "msg-2" }, + ]); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 30); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-2", 30); + expect(mockSendToDlq).not.toHaveBeenCalled(); + expect(mockDeliverPayload).not.toHaveBeenCalled(); + }); + + it("calls recordAdmissionDenied with correlationIds when batch denied", async () => { + const record1 = makeRecord({ messageId: "msg-1" }); + const record2 = makeRecord({ messageId: "msg-2" }); + + mockAdmit.mockResolvedValue({ + allowed: false, + reason: "circuit_open", + retryAfterMs: 30_000, + effectiveRate: 0, + }); - const record1 = makeRecord({ messageId: "msg-1" }); - const record2 = makeRecord({ messageId: "msg-2" }); + const { recordAdmissionDenied } = jest.requireMock( + "services/delivery-observability", + ); - const failures = await processRecords([record1, record2]); + await processRecords([record1, record2]); - expect(failures).toEqual([]); - expect(mockSendToDlq).toHaveBeenCalledWith(record1.body); - expect(mockSendToDlq).toHaveBeenCalledWith(record2.body); - expect(mockSendToDlq).toHaveBeenCalledTimes(2); - expect(mockDeliverPayload).not.toHaveBeenCalled(); - }); + expect(recordAdmissionDenied).toHaveBeenCalledWith( + "client-1", + "target-1", + "circuit_open", + ["test-message-id", "test-message-id"], + ); + }); - it("sends to DLQ when retry window is exhausted", async () => { - mockIsWindowExhausted.mockReturnValue(true); + it("does not call recordResult on gate admission-denied path", async () => { + mockAdmit.mockResolvedValue({ + allowed: false, + reason: "rate_limited", + retryAfterMs: 2000, + effectiveRate: 10, + }); - const failures = await processRecords([makeRecord()]); + await processRecords([makeRecord()]); - expect(failures).toEqual([]); - expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body); - expect(mockDeliverPayload).not.toHaveBeenCalled(); - }); + expect(mockRecordResult).not.toHaveBeenCalled(); + }); + }); + + describe("circuit breaker", () => { + it("proceeds to delivery when circuit breaker is disabled", async () => { + const targetNoCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: false } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetNoCb); + + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([]); + expect(mockAdmit).toHaveBeenCalledWith( + expect.anything(), + "target-1", + 10, + false, + 1, + expect.objectContaining({ burstCapacity: 50 }), + ); + expect(mockDeliverPayload).toHaveBeenCalled(); + }); + + it("defaults cbEnabled to true when delivery exists but circuitBreaker is absent", async () => { + const targetDeliveryNoCb = { + ...DEFAULT_TARGET, + delivery: { mtls: { enabled: true } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetDeliveryNoCb); + + await processRecords([makeRecord()]); + + expect(mockAdmit).toHaveBeenCalledWith( + expect.anything(), + "target-1", + 10, + true, + 1, + expect.objectContaining({ burstCapacity: 50 }), + ); + }); + + it("defaults cbEnabled to false when delivery is absent", async () => { + const targetNoDelivery = { ...DEFAULT_TARGET, delivery: undefined }; + mockLoadTargetConfig.mockResolvedValue(targetNoDelivery); + + await processRecords([makeRecord()]); + + expect(mockAdmit).toHaveBeenCalledWith( + expect.anything(), + "target-1", + 10, + false, + 1, + expect.objectContaining({ burstCapacity: 50 }), + ); + }); + + it("computes burst capacity as invocationRateLimit * 5", async () => { + const targetHighRate = { + ...DEFAULT_TARGET, + invocationRateLimit: 100, + }; + mockLoadTargetConfig.mockResolvedValue(targetHighRate); + + await processRecords([makeRecord()]); + + expect(mockAdmit).toHaveBeenCalledWith( + expect.anything(), + "target-1", + 100, + true, + 1, + expect.objectContaining({ burstCapacity: 500 }), + ); + }); + + it("caps burst capacity at TOKEN_BUCKET_BURST_CAPACITY", async () => { + const targetVeryHighRate = { + ...DEFAULT_TARGET, + invocationRateLimit: 1000, + }; + mockLoadTargetConfig.mockResolvedValue(targetVeryHighRate); + + await processRecords([makeRecord()]); + + expect(mockAdmit).toHaveBeenCalledWith( + expect.anything(), + "target-1", + 1000, + true, + 1, + expect.objectContaining({ burstCapacity: 2250 }), + ); + }); + + it("calls recordResult with batch counts on successful delivery when CB enabled", async () => { + const targetCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: true } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetCb); + mockAdmit.mockResolvedValue({ + allowed: true, + consumedTokens: 1, + effectiveRate: 10, + }); - it("calls changeVisibility with backoff on 5xx", async () => { - mockDeliverPayload.mockResolvedValue({ - outcome: "transient_failure", - statusCode: 503, - }); + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([]); + expect(mockRecordResult).toHaveBeenCalledWith( + expect.anything(), + "target-1", + 1, + 0, + expect.any(Object), + ); + }); + + it("calls recordResult with failure count on 5xx when CB enabled", async () => { + const targetCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: true } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetCb); + mockAdmit.mockResolvedValue({ + allowed: true, + consumedTokens: 1, + effectiveRate: 10, + }); + mockDeliverPayload.mockResolvedValue({ + outcome: "transient_failure", + statusCode: 503, + }); - const failures = await processRecords([makeRecord()]); + const failures = await processRecords([makeRecord()]); + + expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); + expect(mockRecordResult).toHaveBeenCalledWith( + expect.anything(), + "target-1", + 1, + 1, + expect.any(Object), + ); + expect(mockChangeVisibility).toHaveBeenCalled(); + }); + + it("does not call recordResult when CB is disabled on transient failure", async () => { + const targetNoCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: false } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetNoCb); + mockDeliverPayload.mockResolvedValue({ + outcome: "transient_failure", + statusCode: 503, + }); - expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 5); - }); + await processRecords([makeRecord()]); - it("delegates 429 handling to handleRateLimitedRecord", async () => { - mockDeliverPayload.mockResolvedValue({ - outcome: "rate_limited", - retryAfterHeader: "120", + expect(mockRecordResult).not.toHaveBeenCalled(); + expect(mockChangeVisibility).toHaveBeenCalled(); }); - await processRecords([makeRecord()]); + it("does not call recordResult when CB is disabled on success", async () => { + const targetNoCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: false } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetNoCb); - expect(mockHandleRateLimitedRecord).toHaveBeenCalledWith( - makeRecord(), - "client-1", - "target-1", - "120", - 1, - ); - }); + await processRecords([makeRecord()]); - it("returns no failure when handleRateLimitedRecord resolves (e.g. DLQ path)", async () => { - mockDeliverPayload.mockResolvedValue({ - outcome: "rate_limited", - retryAfterHeader: "99999", + expect(mockRecordResult).not.toHaveBeenCalled(); }); - mockHandleRateLimitedRecord.mockResolvedValue("dlq"); - const failures = await processRecords([makeRecord()]); - - expect(failures).toEqual([]); - }); - - it("requeues all records when rate limited by endpoint gate", async () => { - mockAdmit.mockResolvedValue({ - allowed: false, - reason: "rate_limited", - retryAfterMs: 2000, - effectiveRate: 10, - }); + it("records CircuitBreakerOpen when recordResult indicates circuit opened", async () => { + const targetCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: true } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetCb); + mockDeliverPayload.mockResolvedValue({ + outcome: "transient_failure", + statusCode: 503, + }); + mockRecordResult.mockResolvedValue({ + circuitState: "open", + circuitSwitched: true, + }); - const failures = await processRecords([ - makeRecord({ messageId: "msg-1", receiptHandle: "receipt-1" }), - makeRecord({ messageId: "msg-2", receiptHandle: "receipt-2" }), - ]); + const { recordCircuitBreakerOpen } = jest.requireMock( + "services/delivery-observability", + ); - expect(failures).toEqual([ - { itemIdentifier: "msg-1" }, - { itemIdentifier: "msg-2" }, - ]); - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 2); - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-2", 2); - expect(mockSendToDlq).not.toHaveBeenCalled(); - expect(mockDeliverPayload).not.toHaveBeenCalled(); - }); + await processRecords([makeRecord()]); - it("requeues all records when circuit is open", async () => { - mockAdmit.mockResolvedValue({ - allowed: false, - reason: "circuit_open", - retryAfterMs: 30_000, - effectiveRate: 0, - }); - - const failures = await processRecords([ - makeRecord({ messageId: "msg-1", receiptHandle: "receipt-1" }), - makeRecord({ messageId: "msg-2", receiptHandle: "receipt-2" }), - ]); - - expect(failures).toEqual([ - { itemIdentifier: "msg-1" }, - { itemIdentifier: "msg-2" }, - ]); - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 30); - expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-2", 30); - expect(mockSendToDlq).not.toHaveBeenCalled(); - expect(mockDeliverPayload).not.toHaveBeenCalled(); - }); + expect(recordCircuitBreakerOpen).toHaveBeenCalledWith("target-1"); + }); - it("proceeds to delivery when circuit breaker is disabled", async () => { - const targetNoCb = { - ...DEFAULT_TARGET, - delivery: { circuitBreaker: { enabled: false } }, - }; - mockLoadTargetConfig.mockResolvedValue(targetNoCb); - - const failures = await processRecords([makeRecord()]); - - expect(failures).toEqual([]); - expect(mockAdmit).toHaveBeenCalledWith( - expect.anything(), - "target-1", - 10, - false, - 1, - expect.objectContaining({ burstCapacity: 50 }), - ); - expect(mockDeliverPayload).toHaveBeenCalled(); - }); + it("does not record CircuitBreakerOpen when recordResult has no state change", async () => { + const targetCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: true } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetCb); + mockDeliverPayload.mockResolvedValue({ + outcome: "transient_failure", + statusCode: 503, + }); + mockRecordResult.mockResolvedValue({ + circuitState: "open", + circuitSwitched: false, + }); - it("defaults cbEnabled to true when delivery exists but circuitBreaker is absent", async () => { - const targetDeliveryNoCb = { - ...DEFAULT_TARGET, - delivery: { mtls: { enabled: true } }, - }; - mockLoadTargetConfig.mockResolvedValue(targetDeliveryNoCb); - - await processRecords([makeRecord()]); - - expect(mockAdmit).toHaveBeenCalledWith( - expect.anything(), - "target-1", - 10, - true, - 1, - expect.objectContaining({ burstCapacity: 50 }), - ); - }); + const { recordCircuitBreakerOpen } = jest.requireMock( + "services/delivery-observability", + ); - it("defaults cbEnabled to false when delivery is absent", async () => { - const targetNoDelivery = { ...DEFAULT_TARGET, delivery: undefined }; - mockLoadTargetConfig.mockResolvedValue(targetNoDelivery); + await processRecords([makeRecord()]); - await processRecords([makeRecord()]); + expect(recordCircuitBreakerOpen).not.toHaveBeenCalled(); + }); - expect(mockAdmit).toHaveBeenCalledWith( - expect.anything(), - "target-1", - 10, - false, - 1, - expect.objectContaining({ burstCapacity: 50 }), - ); - }); + it("does not record CircuitBreakerOpen when circuit is closed", async () => { + const targetCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: true } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetCb); + mockDeliverPayload.mockResolvedValue({ + outcome: "transient_failure", + statusCode: 503, + }); + mockRecordResult.mockResolvedValue({ + circuitState: "closed", + circuitSwitched: false, + }); - it("computes burst capacity as invocationRateLimit * 5", async () => { - const targetHighRate = { - ...DEFAULT_TARGET, - invocationRateLimit: 100, - }; - mockLoadTargetConfig.mockResolvedValue(targetHighRate); - - await processRecords([makeRecord()]); - - expect(mockAdmit).toHaveBeenCalledWith( - expect.anything(), - "target-1", - 100, - true, - 1, - expect.objectContaining({ burstCapacity: 500 }), - ); - }); + const { recordCircuitBreakerOpen } = jest.requireMock( + "services/delivery-observability", + ); - it("caps burst capacity at TOKEN_BUCKET_BURST_CAPACITY", async () => { - const targetVeryHighRate = { - ...DEFAULT_TARGET, - invocationRateLimit: 1000, - }; - mockLoadTargetConfig.mockResolvedValue(targetVeryHighRate); - - await processRecords([makeRecord()]); - - expect(mockAdmit).toHaveBeenCalledWith( - expect.anything(), - "target-1", - 1000, - true, - 1, - expect.objectContaining({ burstCapacity: 2250 }), - ); - }); + await processRecords([makeRecord()]); - it("calls recordResult with batch counts on successful delivery when CB enabled", async () => { - const targetCb = { - ...DEFAULT_TARGET, - delivery: { circuitBreaker: { enabled: true } }, - }; - mockLoadTargetConfig.mockResolvedValue(targetCb); - mockAdmit.mockResolvedValue({ - allowed: true, - consumedTokens: 1, - effectiveRate: 10, + expect(recordCircuitBreakerOpen).not.toHaveBeenCalled(); }); - const failures = await processRecords([makeRecord()]); - - expect(failures).toEqual([]); - expect(mockRecordResult).toHaveBeenCalledWith( - expect.anything(), - "target-1", - 1, - 0, - expect.any(Object), - ); - }); - - it("calls recordResult with failure count on 5xx when CB enabled", async () => { - const targetCb = { - ...DEFAULT_TARGET, - delivery: { circuitBreaker: { enabled: true } }, - }; - mockLoadTargetConfig.mockResolvedValue(targetCb); - mockAdmit.mockResolvedValue({ - allowed: true, - consumedTokens: 1, - effectiveRate: 10, - }); - mockDeliverPayload.mockResolvedValue({ - outcome: "transient_failure", - statusCode: 503, - }); + it("records CircuitBreakerClosed when recordResult indicates circuit closed", async () => { + const targetCb = { + ...DEFAULT_TARGET, + delivery: { circuitBreaker: { enabled: true } }, + }; + mockLoadTargetConfig.mockResolvedValue(targetCb); + mockDeliverPayload.mockResolvedValue({ + outcome: "success", + statusCode: 200, + }); + mockRecordResult.mockResolvedValue({ + circuitState: "closed_recovery", + circuitSwitched: true, + }); - const failures = await processRecords([makeRecord()]); + const { recordCircuitBreakerClosed } = jest.requireMock( + "services/delivery-observability", + ); - expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); - expect(mockRecordResult).toHaveBeenCalledWith( - expect.anything(), - "target-1", - 1, - 1, - expect.any(Object), - ); - expect(mockChangeVisibility).toHaveBeenCalled(); - }); + await processRecords([makeRecord()]); - it("does not call recordResult on gate admission-denied path", async () => { - mockAdmit.mockResolvedValue({ - allowed: false, - reason: "rate_limited", - retryAfterMs: 2000, - effectiveRate: 10, + expect(recordCircuitBreakerClosed).toHaveBeenCalledWith("target-1"); }); + }); - await processRecords([makeRecord()]); + describe("retry and visibility", () => { + it("calls changeVisibility with backoff on 5xx", async () => { + mockDeliverPayload.mockResolvedValue({ + outcome: "transient_failure", + statusCode: 503, + }); - expect(mockRecordResult).not.toHaveBeenCalled(); - }); + const failures = await processRecords([makeRecord()]); - it("does not call recordResult when CB is disabled on transient failure", async () => { - const targetNoCb = { - ...DEFAULT_TARGET, - delivery: { circuitBreaker: { enabled: false } }, - }; - mockLoadTargetConfig.mockResolvedValue(targetNoCb); - mockDeliverPayload.mockResolvedValue({ - outcome: "transient_failure", - statusCode: 503, + expect(failures).toEqual([{ itemIdentifier: "msg-1" }]); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 5); }); - await processRecords([makeRecord()]); + it("changes visibility once for transient failure", async () => { + mockDeliverPayload.mockResolvedValue({ + outcome: "transient_failure", + statusCode: 503, + }); - expect(mockRecordResult).not.toHaveBeenCalled(); - expect(mockChangeVisibility).toHaveBeenCalled(); - }); + await processRecords([makeRecord()]); - it("does not call recordResult when CB is disabled on success", async () => { - const targetNoCb = { - ...DEFAULT_TARGET, - delivery: { circuitBreaker: { enabled: false } }, - }; - mockLoadTargetConfig.mockResolvedValue(targetNoCb); + expect(mockChangeVisibility).toHaveBeenCalledTimes(1); + }); - await processRecords([makeRecord()]); + it("does not override visibility on rate-limited requeue", async () => { + mockDeliverPayload.mockResolvedValue({ + outcome: "rate_limited", + retryAfterHeader: "120", + statusCode: 429, + }); + mockHandleRateLimitedRecord.mockImplementation(async () => { + await mockChangeVisibility("receipt-1", 120); + return "retry"; + }); - expect(mockRecordResult).not.toHaveBeenCalled(); - }); + await processRecords([makeRecord()]); - it("records CircuitBreakerOpen when recordResult indicates circuit opened", async () => { - const targetCb = { - ...DEFAULT_TARGET, - delivery: { circuitBreaker: { enabled: true } }, - }; - mockLoadTargetConfig.mockResolvedValue(targetCb); - mockDeliverPayload.mockResolvedValue({ - outcome: "transient_failure", - statusCode: 503, - }); - mockRecordResult.mockResolvedValue({ - circuitState: "open", - circuitSwitched: true, + expect(mockChangeVisibility).toHaveBeenCalledTimes(1); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 120); }); - const { recordCircuitBreakerOpen } = jest.requireMock( - "services/delivery-observability", - ); - - await processRecords([makeRecord()]); + it("changes visibility once per record for admission-denied batch", async () => { + mockAdmit.mockResolvedValue({ + allowed: false, + reason: "rate_limited", + retryAfterMs: 2000, + effectiveRate: 10, + }); - expect(recordCircuitBreakerOpen).toHaveBeenCalledWith("target-1"); - }); + await processRecords([ + makeRecord({ messageId: "msg-1", receiptHandle: "receipt-1" }), + makeRecord({ messageId: "msg-2", receiptHandle: "receipt-2" }), + ]); - it("does not record CircuitBreakerOpen when recordResult has no state change", async () => { - const targetCb = { - ...DEFAULT_TARGET, - delivery: { circuitBreaker: { enabled: true } }, - }; - mockLoadTargetConfig.mockResolvedValue(targetCb); - mockDeliverPayload.mockResolvedValue({ - outcome: "transient_failure", - statusCode: 503, - }); - mockRecordResult.mockResolvedValue({ - circuitState: "open", - circuitSwitched: false, + expect(mockChangeVisibility).toHaveBeenCalledTimes(2); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 2); + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-2", 2); }); - const { recordCircuitBreakerOpen } = jest.requireMock( - "services/delivery-observability", - ); + it("caps visibility delay at SQS maximum (12 hours) for admission-denied batch", async () => { + mockAdmit.mockResolvedValue({ + allowed: false, + reason: "rate_limited", + retryAfterMs: 60_000, + effectiveRate: 10, + }); - await processRecords([makeRecord()]); + const record = makeRecord({ + attributes: { + ApproximateReceiveCount: "1000", + SentTimestamp: "0", + SenderId: "sender", + ApproximateFirstReceiveTimestamp: "0", + }, + }); - expect(recordCircuitBreakerOpen).not.toHaveBeenCalled(); - }); + await processRecords([record]); - it("does not record CircuitBreakerOpen when circuit is closed", async () => { - const targetCb = { - ...DEFAULT_TARGET, - delivery: { circuitBreaker: { enabled: true } }, - }; - mockLoadTargetConfig.mockResolvedValue(targetCb); - mockDeliverPayload.mockResolvedValue({ - outcome: "transient_failure", - statusCode: 503, - }); - mockRecordResult.mockResolvedValue({ - circuitState: "closed", - circuitSwitched: false, + expect(mockChangeVisibility).toHaveBeenCalledWith("receipt-1", 43_200); }); - const { recordCircuitBreakerOpen } = jest.requireMock( - "services/delivery-observability", - ); + it("delegates 429 handling to handleRateLimitedRecord", async () => { + mockDeliverPayload.mockResolvedValue({ + outcome: "rate_limited", + retryAfterHeader: "120", + }); - await processRecords([makeRecord()]); + await processRecords([makeRecord()]); - expect(recordCircuitBreakerOpen).not.toHaveBeenCalled(); - }); - - it("records CircuitBreakerClosed when recordResult indicates circuit closed", async () => { - const targetCb = { - ...DEFAULT_TARGET, - delivery: { circuitBreaker: { enabled: true } }, - }; - mockLoadTargetConfig.mockResolvedValue(targetCb); - mockDeliverPayload.mockResolvedValue({ - outcome: "success", - statusCode: 200, - }); - mockRecordResult.mockResolvedValue({ - circuitState: "closed_recovery", - circuitSwitched: true, + expect(mockHandleRateLimitedRecord).toHaveBeenCalledWith( + makeRecord(), + "client-1", + "target-1", + "120", + 1, + ); }); - const { recordCircuitBreakerClosed } = jest.requireMock( - "services/delivery-observability", - ); - - await processRecords([makeRecord()]); + it("uses configured maxRetryDurationSeconds when set on target", async () => { + const targetWithRetry = { + ...DEFAULT_TARGET, + delivery: { + ...DEFAULT_TARGET.delivery, + maxRetryDurationSeconds: 3600, + }, + }; + mockLoadTargetConfig.mockResolvedValue(targetWithRetry); + mockIsWindowExhausted.mockReturnValue(false); - expect(recordCircuitBreakerClosed).toHaveBeenCalledWith("target-1"); - }); + const failures = await processRecords([makeRecord()]); - it("records RateLimited on 429 response", async () => { - mockDeliverPayload.mockResolvedValue({ - outcome: "rate_limited", - retryAfterHeader: "60", + expect(failures).toEqual([]); + expect(mockIsWindowExhausted).toHaveBeenCalledWith( + expect.any(Number), + 3_600_000, + ); }); - const { recordDeliveryRateLimited } = jest.requireMock( - "services/delivery-observability", - ); + it("uses default maxRetryDurationMs when target has no maxRetryDurationSeconds", async () => { + const failures = await processRecords([makeRecord()]); - await processRecords([makeRecord()]); - - expect(recordDeliveryRateLimited).toHaveBeenCalledWith( - "client-1", - "target-1", - "test-message-id", - ); + expect(failures).toEqual([]); + expect(mockIsWindowExhausted).toHaveBeenCalledWith( + expect.any(Number), + 7_200_000, + ); + }); }); - it("uses configured maxRetryDurationSeconds when set on target", async () => { - const targetWithRetry = { - ...DEFAULT_TARGET, - delivery: { ...DEFAULT_TARGET.delivery, maxRetryDurationSeconds: 3600 }, - }; - mockLoadTargetConfig.mockResolvedValue(targetWithRetry); - mockIsWindowExhausted.mockReturnValue(false); - - const failures = await processRecords([makeRecord()]); - - expect(failures).toEqual([]); - expect(mockIsWindowExhausted).toHaveBeenCalledWith( - expect.any(Number), - 3_600_000, - ); - }); + describe("error handling", () => { + it("sends all records to DLQ when CLIENT_ID is not set", async () => { + delete process.env.CLIENT_ID; - it("uses default maxRetryDurationMs when target has no maxRetryDurationSeconds", async () => { - const failures = await processRecords([makeRecord()]); + const record1 = makeRecord({ messageId: "msg-1" }); + const record2 = makeRecord({ messageId: "msg-2" }); - expect(failures).toEqual([]); - expect(mockIsWindowExhausted).toHaveBeenCalledWith( - expect.any(Number), - 7_200_000, - ); - }); + const failures = await processRecords([record1, record2]); - it("groups records by target and processes each batch separately", async () => { - const record1 = makeRecord({ messageId: "msg-1" }); - const record2 = makeRecord({ - messageId: "msg-2", - body: JSON.stringify({ - payload: { - data: [ - { - type: "MessageStatus", - attributes: { messageStatus: "delivered" }, - }, - ], - }, - subscriptionId: "sub-2", - targetId: "target-2", - }), + expect(failures).toEqual([]); + expect(mockSendToDlq).toHaveBeenCalledWith(record1.body); + expect(mockSendToDlq).toHaveBeenCalledWith(record2.body); + expect(mockSendToDlq).toHaveBeenCalledTimes(2); + expect(mockDeliverPayload).not.toHaveBeenCalled(); }); - const failures = await processRecords([record1, record2]); + it("an unexpected delivery error does not prevent other records in the batch", async () => { + const record1 = makeRecord({ messageId: "msg-1" }); + const record2 = makeRecord({ messageId: "msg-2" }); - expect(failures).toEqual([]); - expect(mockAdmit).toHaveBeenCalledTimes(2); - expect(mockLoadTargetConfig).toHaveBeenCalledWith("client-1", "target-1"); - expect(mockLoadTargetConfig).toHaveBeenCalledWith("client-1", "target-2"); - }); + mockDeliverPayload + .mockRejectedValueOnce(new Error("Connection reset")) + .mockResolvedValueOnce({ outcome: "success" }); - it("calls recordAdmissionDenied with correlationIds when batch denied", async () => { - const record1 = makeRecord({ messageId: "msg-1" }); - const record2 = makeRecord({ messageId: "msg-2" }); + const failures = await processRecords([record1, record2]); - mockAdmit.mockResolvedValue({ - allowed: false, - reason: "circuit_open", - retryAfterMs: 30_000, - effectiveRate: 0, + expect(failures).toEqual([]); + expect(mockSendToDlq).toHaveBeenCalledWith(record1.body); }); - const { recordAdmissionDenied } = jest.requireMock( - "services/delivery-observability", - ); + it("sends unhandled errors to DLQ", async () => { + mockDeliverPayload.mockRejectedValue(new Error("Infrastructure error")); - await processRecords([record1, record2]); + const failures = await processRecords([makeRecord()]); - expect(recordAdmissionDenied).toHaveBeenCalledWith( - "client-1", - "target-1", - "circuit_open", - ["test-message-id", "test-message-id"], - ); - }); - - it("logs deliveredCount and dlqCount in batch complete", async () => { - const record1 = makeRecord({ messageId: "msg-1" }); - const record2 = makeRecord({ messageId: "msg-2" }); - - mockDeliverPayload - .mockResolvedValueOnce({ outcome: "success" }) - .mockResolvedValueOnce({ outcome: "permanent_failure" }); - - const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); - - await processRecords([record1, record2]); - - expect(logger.info).toHaveBeenCalledWith( - "Batch complete", - expect.objectContaining({ - batchSize: 2, - deliveredCount: 1, - dlqCount: 1, - retryCount: 0, - }), - ); - }); + expect(failures).toEqual([]); + expect(mockSendToDlq).toHaveBeenCalledWith(makeRecord().body); + expect(mockChangeVisibility).not.toHaveBeenCalled(); + }); - it("includes correlationId in error log on unexpected delivery failure", async () => { - mockDeliverPayload.mockRejectedValue(new Error("Connection reset")); + it("sends unparseable records to DLQ and logs error", async () => { + const badRecord = makeRecord({ + messageId: "bad-msg", + body: "not-valid-json{{{", + }); + const goodRecord = makeRecord({ messageId: "good-msg" }); + + const { logger } = jest.requireMock( + "@nhs-notify-client-callbacks/logger", + ); + + const result = await processRecords([badRecord, goodRecord]); + + expect(mockSendToDlq).toHaveBeenCalledWith(badRecord.body); + expect(logger.error).toHaveBeenCalledWith( + "Unparseable message body \u2014 sending to DLQ", + expect.objectContaining({ messageId: "bad-msg" }), + ); + expect(result).toHaveLength(0); + expect(logger.info).toHaveBeenCalledWith( + "Batch complete", + expect.objectContaining({ dlqCount: 1, deliveredCount: 1 }), + ); + }); + + it("returns record for retry when DLQ send fails after delivery error", async () => { + mockDeliverPayload.mockRejectedValue(new Error("Connection reset")); + mockSendToDlq.mockRejectedValue(new Error("DLQ unavailable")); + + const { logger } = jest.requireMock( + "@nhs-notify-client-callbacks/logger", + ); + + const result = await processRecords([makeRecord()]); + + expect(logger.error).toHaveBeenCalledWith( + "DLQ send also failed \u2014 returning for retry", + expect.objectContaining({ messageId: "msg-1" }), + ); + expect(result).toHaveLength(1); + expect(result[0].itemIdentifier).toBe("msg-1"); + }); + + it("swallows reportCircuitBreaker errors without affecting delivery", async () => { + mockRecordResult.mockRejectedValue(new Error("Redis timeout")); + + const { logger } = jest.requireMock( + "@nhs-notify-client-callbacks/logger", + ); + + const result = await processRecords([makeRecord()]); + + expect(logger.error).toHaveBeenCalledWith( + "Failed to report circuit breaker result", + expect.objectContaining({ targetId: "target-1" }), + ); + expect(result).toHaveLength(0); + expect(logger.info).toHaveBeenCalledWith( + "Batch complete", + expect.objectContaining({ deliveredCount: 1 }), + ); + }); + + it("sends all batch records to DLQ when processTargetBatch throws", async () => { + mockLoadTargetConfig.mockRejectedValue( + new Error("Config service unavailable"), + ); + + const { logger } = jest.requireMock( + "@nhs-notify-client-callbacks/logger", + ); + + const records = [ + makeRecord({ messageId: "msg-1" }), + makeRecord({ messageId: "msg-2" }), + ]; + const result = await processRecords(records); + + expect(logger.error).toHaveBeenCalledWith( + "Target batch failed \u2014 sending all records to DLQ", + expect.objectContaining({ targetId: "target-1" }), + ); + expect(mockSendToDlq).toHaveBeenCalledTimes(2); + expect(result).toHaveLength(0); + expect(logger.info).toHaveBeenCalledWith( + "Batch complete", + expect.objectContaining({ dlqCount: 2 }), + ); + }); + }); + + describe("observability", () => { + it("records RateLimited on 429 response", async () => { + mockDeliverPayload.mockResolvedValue({ + outcome: "rate_limited", + retryAfterHeader: "60", + }); - const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); + const { recordDeliveryRateLimited } = jest.requireMock( + "services/delivery-observability", + ); - await processRecords([makeRecord()]); + await processRecords([makeRecord()]); - expect(logger.error).toHaveBeenCalledWith( - "Failed to process record", - expect.objectContaining({ - messageId: "msg-1", - correlationId: "test-message-id", - }), - ); - }); - - it("sends unparseable records to DLQ and logs error", async () => { - const badRecord = makeRecord({ - messageId: "bad-msg", - body: "not-valid-json{{{", + expect(recordDeliveryRateLimited).toHaveBeenCalledWith( + "client-1", + "target-1", + "test-message-id", + ); }); - const goodRecord = makeRecord({ messageId: "good-msg" }); - - const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); - - const result = await processRecords([badRecord, goodRecord]); - - expect(mockSendToDlq).toHaveBeenCalledWith(badRecord.body); - expect(logger.error).toHaveBeenCalledWith( - "Unparseable message body \u2014 sending to DLQ", - expect.objectContaining({ messageId: "bad-msg" }), - ); - expect(result).toHaveLength(0); - expect(logger.info).toHaveBeenCalledWith( - "Batch complete", - expect.objectContaining({ dlqCount: 1, deliveredCount: 1 }), - ); - }); - it("returns record for retry when DLQ send fails after delivery error", async () => { - mockDeliverPayload.mockRejectedValue(new Error("Connection reset")); - mockSendToDlq.mockRejectedValue(new Error("DLQ unavailable")); + it("includes correlationId in error log on unexpected delivery failure", async () => { + mockDeliverPayload.mockRejectedValue(new Error("Connection reset")); - const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); + const { logger } = jest.requireMock( + "@nhs-notify-client-callbacks/logger", + ); - const result = await processRecords([makeRecord()]); + await processRecords([makeRecord()]); - expect(logger.error).toHaveBeenCalledWith( - "DLQ send also failed \u2014 returning for retry", - expect.objectContaining({ messageId: "msg-1" }), - ); - expect(result).toHaveLength(1); - expect(result[0].itemIdentifier).toBe("msg-1"); - }); + expect(logger.error).toHaveBeenCalledWith( + "Failed to process record", + expect.objectContaining({ + messageId: "msg-1", + correlationId: "test-message-id", + }), + ); + }); - it("swallows reportCircuitBreaker errors without affecting delivery", async () => { - mockRecordResult.mockRejectedValue(new Error("Redis timeout")); + it("logs deliveredCount and dlqCount in batch complete", async () => { + const record1 = makeRecord({ messageId: "msg-1" }); + const record2 = makeRecord({ messageId: "msg-2" }); - const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); + mockDeliverPayload + .mockResolvedValueOnce({ outcome: "success" }) + .mockResolvedValueOnce({ outcome: "permanent_failure" }); - const result = await processRecords([makeRecord()]); + const { logger } = jest.requireMock( + "@nhs-notify-client-callbacks/logger", + ); - expect(logger.error).toHaveBeenCalledWith( - "Failed to report circuit breaker result", - expect.objectContaining({ targetId: "target-1" }), - ); - expect(result).toHaveLength(0); - expect(logger.info).toHaveBeenCalledWith( - "Batch complete", - expect.objectContaining({ deliveredCount: 1 }), - ); - }); + await processRecords([record1, record2]); - it("sends all batch records to DLQ when processTargetBatch throws", async () => { - mockLoadTargetConfig.mockRejectedValue( - new Error("Config service unavailable"), - ); - - const { logger } = jest.requireMock("@nhs-notify-client-callbacks/logger"); - - const records = [ - makeRecord({ messageId: "msg-1" }), - makeRecord({ messageId: "msg-2" }), - ]; - const result = await processRecords(records); - - expect(logger.error).toHaveBeenCalledWith( - "Target batch failed \u2014 sending all records to DLQ", - expect.objectContaining({ targetId: "target-1" }), - ); - expect(mockSendToDlq).toHaveBeenCalledTimes(2); - expect(result).toHaveLength(0); - expect(logger.info).toHaveBeenCalledWith( - "Batch complete", - expect.objectContaining({ dlqCount: 2 }), - ); + expect(logger.info).toHaveBeenCalledWith( + "Batch complete", + expect.objectContaining({ + batchSize: 2, + deliveredCount: 1, + dlqCount: 1, + retryCount: 0, + }), + ); + }); }); }); From 200875cfce013df49285c45b29a3be46015880e5 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 15:50:16 +0100 Subject: [PATCH 60/65] Fix tls agent test assertions --- .../https-client-lambda/src/__tests__/tls-agent-factory.test.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts b/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts index 4790f4eb..09a54802 100644 --- a/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts +++ b/lambdas/https-client-lambda/src/__tests__/tls-agent-factory.test.ts @@ -106,12 +106,14 @@ describe("tls-agent-factory", () => { expect(agent).toBeDefined(); expect(agent.options.keepAlive).toBe(false); + expect(mockS3Send).toHaveBeenCalled(); }); it("builds agent without key and cert when mtls is disabled", async () => { const agent = await buildAgent(createTarget()); expect(agent).toBeDefined(); + expect(agent.options.keepAlive).toBe(false); expect(mockS3Send).not.toHaveBeenCalled(); }); From cc35c24517048a3db92cf5f0b08dd79bbe5f8800 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 15:51:00 +0100 Subject: [PATCH 61/65] Update subscription tool README --- .../client-subscriptions-management/README.md | 156 ++++++++++-------- 1 file changed, 88 insertions(+), 68 deletions(-) diff --git a/tools/client-subscriptions-management/README.md b/tools/client-subscriptions-management/README.md index 9bef106b..b735c301 100644 --- a/tools/client-subscriptions-management/README.md +++ b/tools/client-subscriptions-management/README.md @@ -1,109 +1,129 @@ # client-subscriptions-management -TypeScript CLI utility for managing NHS Notify client subscription configuration in S3. +TypeScript CLI for managing NHS Notify client callback configuration in S3. ## Usage -From the repository root run: - ```bash pnpm --filter client-subscriptions-management run -- [options] ``` -## Example +### Common options -Deploy a message status subscription to the `dev` environment using a named AWS profile: +All commands accept: -```bash -pnpm --filter client-subscriptions-management run deploy -- message \ - --environment dev \ - --profile my-profile \ - --client-id my-client \ - --message-statuses DELIVERED FAILED \ - --api-endpoint https://webhook.example.invalid/callbacks \ - --api-key 1234.4321 \ - --rate-limit 20 \ - --dry-run false -``` +| Option | Default | Description | +| --------------- | ------------- | --------------------------------------------- | +| `--environment` | — | Environment name (used to derive bucket name) | +| `--bucket-name` | derived | Override the S3 bucket name | +| `--region` | `eu-west-2` | AWS region | +| `--profile` | `AWS_PROFILE` | AWS credentials profile | -## Commands +Write commands (`clients-put`, `subscriptions-add`, `subscriptions-del`, `subscriptions-set-states`, `targets-add`, `targets-del`, `applications-map-add`) also accept `--dry-run` (default `false`). -### Deploy a Subscription +## Commands -#### Message status +### Clients ```bash -pnpm --filter client-subscriptions-management run deploy -- message \ - --environment dev \ +# List all client IDs +pnpm --filter client-subscriptions-management run clients-list -- --environment dev + +# Get a client's full configuration +pnpm --filter client-subscriptions-management run clients-get -- --environment dev --client-id client-123 + +# Write a full client configuration from JSON +pnpm --filter client-subscriptions-management run clients-put -- --environment dev \ --client-id client-123 \ - --message-statuses DELIVERED FAILED \ - --api-endpoint https://webhook.example.invalid \ - --api-key-header-name x-api-key \ - --api-key 1234.4321 \ - --dry-run false \ - --rate-limit 20 + --file config.json ``` -#### Channel status +`clients-put` accepts `--json ` or `--file ` (mutually exclusive, one required). + +### Targets ```bash -pnpm --filter client-subscriptions-management run deploy -- channel \ - --environment dev \ +# List a client's callback targets +pnpm --filter client-subscriptions-management run targets-list -- --environment dev --client-id client-123 + +# Add a callback target +pnpm --filter client-subscriptions-management run targets-add -- --environment dev \ --client-id client-123 \ - --channel-type EMAIL \ - --channel-statuses DELIVERED FAILED \ - --supplier-statuses READ REJECTED \ --api-endpoint https://webhook.example.invalid \ - --api-key-header-name x-api-key \ - --api-key 1234.4321 \ - --dry-run false \ + --api-key secret-key \ --rate-limit 20 + +# Delete a callback target +pnpm --filter client-subscriptions-management run targets-del -- --environment dev \ + --client-id client-123 \ + --target-id target-abc ``` -Optional for both: `--client-name "Test Client"` (defaults to client-id if not provided), `--project ` (defaults to `nhs`), `--region ` (defaults to `eu-west-2`), `--profile `, `--bucket-name ` (override derived bucket name) +`targets-add` options: -**Note (channel)**: At least one of `--channel-statuses` or `--supplier-statuses` must be provided. +| Option | Required | Default | +| ----------------------- | -------- | ----------- | +| `--api-endpoint` | yes | — | +| `--api-key` | yes | — | +| `--api-key-header-name` | no | `x-api-key` | +| `--rate-limit` | yes | — | -### Get Client Subscriptions By Client ID +### Subscriptions ```bash -pnpm --filter client-subscriptions-management run get-by-client-id -- \ - --environment dev \ - --client-id client-123 -``` +# List a client's subscriptions +pnpm --filter client-subscriptions-management run subscriptions-list -- --environment dev --client-id client-123 -### Put Message Status Subscription (S3 upload only) +# Add a message status subscription +pnpm --filter client-subscriptions-management run subscriptions-add -- --environment dev \ + --client-id client-123 \ + --subscription-type MessageStatus \ + --target-id target-abc \ + --message-statuses DELIVERED FAILED -```bash -pnpm --filter client-subscriptions-management run put-message-status -- \ - --environment dev \ +# Add a channel status subscription +pnpm --filter client-subscriptions-management run subscriptions-add -- --environment dev \ --client-id client-123 \ - --message-statuses DELIVERED FAILED \ - --api-endpoint https://webhook.example.invalid \ - --api-key-header-name x-api-key \ - --api-key 1234.4321 \ - --dry-run false \ - --rate-limit 20 + --subscription-type ChannelStatus \ + --target-id target-abc \ + --channel-type EMAIL \ + --channel-statuses DELIVERED FAILED \ + --supplier-statuses read rejected + +# Update statuses on an existing subscription +pnpm --filter client-subscriptions-management run subscriptions-set-states -- --environment dev \ + --client-id client-123 \ + --subscription-id sub-456 \ + --message-statuses DELIVERED FAILED SENDING + +# Delete a subscription +pnpm --filter client-subscriptions-management run subscriptions-del -- --environment dev \ + --client-id client-123 \ + --subscription-id sub-456 ``` -Optional: `--client-name "Test Client"` (defaults to client-id if not provided), `--profile `, `--bucket-name ` +`subscriptions-add` options: + +| Option | Required | Notes | +| --------------------- | ----------- | --------------------------------------------------------------------------------- | +| `--subscription-type` | yes | `MessageStatus` or `ChannelStatus` | +| `--target-id` | yes | One or more target IDs | +| `--message-statuses` | conditional | Required for `MessageStatus` | +| `--channel-type` | conditional | Required for `ChannelStatus` (`NHSAPP`, `EMAIL`, `SMS`, `LETTER`) | +| `--channel-statuses` | conditional | At least one of `--channel-statuses` or `--supplier-statuses` for `ChannelStatus` | +| `--supplier-statuses` | conditional | See above | +| `--subscription-id` | no | Auto-generated UUID if omitted | -### Put Channel Status Subscription (S3 upload only) +### Applications Map ```bash -pnpm --filter client-subscriptions-management run put-channel-status -- \ - --environment dev \ +# Get the application ID for a client +pnpm --filter client-subscriptions-management run applications-map-get -- --environment dev --client-id client-123 + +# Add/update a client-to-application-ID mapping +pnpm --filter client-subscriptions-management run applications-map-add -- --environment dev \ --client-id client-123 \ - --channel-type EMAIL \ - --channel-statuses DELIVERED FAILED \ - --supplier-statuses READ REJECTED \ - --api-endpoint https://webhook.example.invalid \ - --api-key-header-name x-api-key \ - --api-key 1234.4321 \ - --dry-run false \ - --rate-limit 20 + --application-id app-789 ``` -Optional: `--client-name "Test Client"` (defaults to client-id if not provided), `--profile `, `--bucket-name ` - -**Note**: At least one of `--channel-statuses` or `--supplier-statuses` must be provided. +Both accept optional `--applications-map-bucket` and `--applications-map-key` to override the default S3 location. From b8ccd6f78858aea2d8244fcfe5631500d1520042 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 15:55:43 +0100 Subject: [PATCH 62/65] Ensure all sub tool CLI options output when doing dry run --- .../__tests__/entrypoint/cli/applications-map-add.test.ts | 6 ++---- .../src/__tests__/entrypoint/cli/clients-put.test.ts | 7 ++++++- .../src/entrypoint/cli/applications-map-add.ts | 5 +++-- .../src/entrypoint/cli/clients-put.ts | 6 +++--- .../src/entrypoint/cli/subscriptions-add.ts | 3 +++ .../src/entrypoint/cli/subscriptions-del.ts | 3 +++ .../src/entrypoint/cli/subscriptions-set-states.ts | 3 +++ .../src/entrypoint/cli/targets-add.ts | 3 +++ .../src/entrypoint/cli/targets-del.ts | 3 +++ 9 files changed, 29 insertions(+), 10 deletions(-) diff --git a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/applications-map-add.test.ts b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/applications-map-add.test.ts index c44eef76..27a6a28f 100644 --- a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/applications-map-add.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/applications-map-add.test.ts @@ -88,7 +88,7 @@ describe("applications-map-add CLI", () => { await cli.main(baseArgs); expect(console.log).not.toHaveBeenCalledWith( - "Dry run — no changes written to S3.", + "Dry run — no changes written.", ); }); @@ -96,9 +96,7 @@ describe("applications-map-add CLI", () => { await cli.main([...baseArgs, "--dry-run"]); expect(mockAddApplication).toHaveBeenCalledWith("client-1", "app-1", true); - expect(console.log).toHaveBeenCalledWith( - "Dry run — no changes written to S3.", - ); + expect(console.log).toHaveBeenCalledWith("Dry run — no changes written."); }); it("handles errors in wrapped CLI", async () => { diff --git a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/clients-put.test.ts b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/clients-put.test.ts index 39b800ab..4e0eb8a8 100644 --- a/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/clients-put.test.ts +++ b/tools/client-subscriptions-management/src/__tests__/entrypoint/cli/clients-put.test.ts @@ -243,10 +243,15 @@ describe("clients-put CLI", () => { validConfig, true, ); - expect(console.log).toHaveBeenCalledWith("Dry run: config is valid"); + expect(console.log).toHaveBeenCalledWith( + "Dry run \u2014 no changes written.", + ); expect(console.log).toHaveBeenCalledWith( JSON.stringify(validConfig, null, 2), ); + expect(console.log).not.toHaveBeenCalledWith( + `Config written for client: client-1`, + ); }); it("handles errors in wrapped CLI", async () => { diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/applications-map-add.ts b/tools/client-subscriptions-management/src/entrypoint/cli/applications-map-add.ts index 365d6e51..f1bfcf8b 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/applications-map-add.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/applications-map-add.ts @@ -41,9 +41,10 @@ export const handler: CliCommand["handler"] = async ( argv["application-id"], argv["dry-run"], ); - console.log(`Applications map updated for client '${argv["client-id"]}'.`); if (argv["dry-run"]) { - console.log("Dry run — no changes written to S3."); + console.log("Dry run \u2014 no changes written."); + } else { + console.log(`Applications map updated for client '${argv["client-id"]}'.`); } console.log(formatApplicationsMap(result)); }; diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/clients-put.ts b/tools/client-subscriptions-management/src/entrypoint/cli/clients-put.ts index ce3d1cca..ef7cf65f 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/clients-put.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/clients-put.ts @@ -93,11 +93,11 @@ export const handler: CliCommand["handler"] = async (argv) => { argv["dry-run"], ); - console.log(`Config written for client: ${argv["client-id"]}`); - if (argv["dry-run"]) { - console.log("Dry run: config is valid"); + console.log("Dry run \u2014 no changes written."); console.log(JSON.stringify(result, null, 2)); + } else { + console.log(`Config written for client: ${argv["client-id"]}`); } }; diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/subscriptions-add.ts b/tools/client-subscriptions-management/src/entrypoint/cli/subscriptions-add.ts index fc34cd2b..b367fa2f 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/subscriptions-add.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/subscriptions-add.ts @@ -146,6 +146,9 @@ export const handler: CliCommand["handler"] = async ( argv["dry-run"], ); + if (argv["dry-run"]) { + console.log("Dry run \u2014 no changes written."); + } console.log(formatClientConfig(result)); }; diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/subscriptions-del.ts b/tools/client-subscriptions-management/src/entrypoint/cli/subscriptions-del.ts index 74c07da0..a5f675f8 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/subscriptions-del.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/subscriptions-del.ts @@ -39,6 +39,9 @@ export const handler: CliCommand["handler"] = async ( argv["dry-run"], ); + if (argv["dry-run"]) { + console.log("Dry run \u2014 no changes written."); + } console.log(formatClientConfig(result)); }; diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/subscriptions-set-states.ts b/tools/client-subscriptions-management/src/entrypoint/cli/subscriptions-set-states.ts index ee17a979..1d6f8228 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/subscriptions-set-states.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/subscriptions-set-states.ts @@ -89,6 +89,9 @@ export const handler: CliCommand["handler"] = argv["dry-run"], ); + if (argv["dry-run"]) { + console.log("Dry run \u2014 no changes written."); + } console.log(formatClientConfig(result)); }; diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/targets-add.ts b/tools/client-subscriptions-management/src/entrypoint/cli/targets-add.ts index 524d51d7..35724d1a 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/targets-add.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/targets-add.ts @@ -65,6 +65,9 @@ export const handler: CliCommand["handler"] = async (argv) => { target, argv["dry-run"], ); + if (argv["dry-run"]) { + console.log("Dry run \u2014 no changes written."); + } console.log(`Target added with ID: ${target.targetId}`); console.log(formatClientConfig(result)); }; diff --git a/tools/client-subscriptions-management/src/entrypoint/cli/targets-del.ts b/tools/client-subscriptions-management/src/entrypoint/cli/targets-del.ts index 7f2c3e19..d7030dec 100644 --- a/tools/client-subscriptions-management/src/entrypoint/cli/targets-del.ts +++ b/tools/client-subscriptions-management/src/entrypoint/cli/targets-del.ts @@ -34,6 +34,9 @@ export const handler: CliCommand["handler"] = async (argv) => { argv["dry-run"], ); + if (argv["dry-run"]) { + console.log("Dry run \u2014 no changes written."); + } console.log(formatClientConfig(result)); }; From 1e9510aed3f1852ad5eb41bed9b071f6f80d3596 Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 16:32:37 +0100 Subject: [PATCH 63/65] Remove nosiy redis client logging --- lambdas/https-client-lambda/src/services/redis-client.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/lambdas/https-client-lambda/src/services/redis-client.ts b/lambdas/https-client-lambda/src/services/redis-client.ts index 7d8785c8..5dbc295c 100644 --- a/lambdas/https-client-lambda/src/services/redis-client.ts +++ b/lambdas/https-client-lambda/src/services/redis-client.ts @@ -63,7 +63,6 @@ export async function getRedisClient(): Promise { tokenExpiry > Date.now() + TOKEN_REFRESH_BUFFER_SECONDS * 1000; if (redisClient?.isOpen && isTokenValid) { - logger.info("Reusing existing Redis client"); return redisClient; } From 95bb5eac6e78b8dd3507061e6cff18960f91fc9a Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 17:20:21 +0100 Subject: [PATCH 64/65] Flakey circuit breaker test fix --- tests/integration/delivery-resilience.test.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/integration/delivery-resilience.test.ts b/tests/integration/delivery-resilience.test.ts index 64d32018..6420d0b8 100644 --- a/tests/integration/delivery-resilience.test.ts +++ b/tests/integration/delivery-resilience.test.ts @@ -221,11 +221,12 @@ describe("Delivery Resilience", () => { ); expect(warmupCallback.path).toBe(cbTargetPath); + const forceUntil = Date.now() + 60_000; const cbEvents = Array.from({ length: CB_BURST_SIZE }, () => createMessageStatusPublishEvent({ data: { clientId: cbConfig.clientId, - messageId: `force-500-cb-${crypto.randomUUID()}`, + messageId: `force-500-until-${forceUntil}-cb-${crypto.randomUUID()}`, }, }), ); From 95bdb23fa88077b799a532370db6b3363c7e6b0b Mon Sep 17 00:00:00 2001 From: Mike Wild Date: Thu, 7 May 2026 17:21:21 +0100 Subject: [PATCH 65/65] Log if IT purge fails due do 1 being in progress --- tests/integration/helpers/sqs.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/integration/helpers/sqs.ts b/tests/integration/helpers/sqs.ts index b0d3f4ff..1420b19d 100644 --- a/tests/integration/helpers/sqs.ts +++ b/tests/integration/helpers/sqs.ts @@ -153,9 +153,11 @@ export async function purgeQueue( }), ); } catch (error) { - if (!(error instanceof Error) || error.name !== "PurgeQueueInProgress") { - throw error; + if (error instanceof Error && error.name === "PurgeQueueInProgress") { + logger.warn(`Purge already in progress, skipping (${queueUrl})`); + return; } + throw error; } }