From 1db4b227f9612c51dbb67912a10c1e571aa7262c Mon Sep 17 00:00:00 2001 From: JamesPasta Date: Thu, 7 May 2026 13:01:51 -0700 Subject: [PATCH 01/13] feature/AB#32049-Promethious --- .../Unity.GrantManager/docker-compose.yml | 21 +++++ .../scripts/prometheus/alert-rules.yml | 31 ++++++ .../scripts/prometheus/alertmanager.yml | 15 +++ .../scripts/prometheus/prometheus.yml | 17 ++++ .../Controllers/Monitoring/AlertPayload.cs | 38 ++++++++ .../Monitoring/AlertWebhookController.cs | 94 +++++++++++++++++++ .../GrantManagerWebModule.cs | 3 + .../Middleware/ErrorCountingLoggerProvider.cs | 34 +++++++ .../Middleware/ExceptionCounterMiddleware.cs | 74 +++++++++++++++ .../src/Unity.GrantManager.Web/Program.cs | 17 +++- .../Unity.GrantManager.Web.csproj | 6 ++ 11 files changed, 349 insertions(+), 1 deletion(-) create mode 100644 applications/Unity.GrantManager/scripts/prometheus/alert-rules.yml create mode 100644 applications/Unity.GrantManager/scripts/prometheus/alertmanager.yml create mode 100644 applications/Unity.GrantManager/scripts/prometheus/prometheus.yml create mode 100644 applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertPayload.cs create mode 100644 applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs create mode 100644 applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ErrorCountingLoggerProvider.cs create mode 100644 applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs diff --git a/applications/Unity.GrantManager/docker-compose.yml b/applications/Unity.GrantManager/docker-compose.yml index a3015c37e3..720f9574d6 100644 --- a/applications/Unity.GrantManager/docker-compose.yml +++ b/applications/Unity.GrantManager/docker-compose.yml @@ -146,6 +146,27 @@ services: networks: - common-network + prometheus: + image: prom/prometheus:latest + ports: + - "9090:9090" + volumes: + - ./scripts/prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - ./scripts/prometheus/alert-rules.yml:/etc/prometheus/alert-rules.yml:ro + depends_on: + - unity-grantmanager-web + networks: + - common-network + + alertmanager: + image: prom/alertmanager:latest + ports: + - "9093:9093" + volumes: + - ./scripts/prometheus/alertmanager.yml:/etc/alertmanager/alertmanager.yml:ro + networks: + - common-network + volumes: postgres_data: redis_volume_data: diff --git a/applications/Unity.GrantManager/scripts/prometheus/alert-rules.yml b/applications/Unity.GrantManager/scripts/prometheus/alert-rules.yml new file mode 100644 index 0000000000..b53ad823d8 --- /dev/null +++ b/applications/Unity.GrantManager/scripts/prometheus/alert-rules.yml @@ -0,0 +1,31 @@ +groups: + - name: unity-grantmanager-exceptions + rules: + # Fire if any exception type exceeds 5 occurrences in a 5-minute window + - alert: HighExceptionRate + expr: | + increase(application_exceptions_total[5m]) > 5 + for: 1m + labels: + severity: critical + annotations: + summary: "High exception rate in Unity GrantManager" + description: > + Exception type {{ $labels.type }} has fired {{ $value | humanize }} times + in the last 5 minutes (namespace: {{ $labels.kubernetes_namespace_name }}). + + # Fire if any new exception type appears (catches regressions) + - alert: NewExceptionType + expr: | + increase(application_exceptions_total[10m]) > 0 + unless ( + increase(application_exceptions_total[10m] offset 10m) > 0 + ) + for: 0m + labels: + severity: warning + annotations: + summary: "New exception type detected in Unity GrantManager" + description: > + A new exception type {{ $labels.type }} appeared for the first time + in the last 10 minutes. diff --git a/applications/Unity.GrantManager/scripts/prometheus/alertmanager.yml b/applications/Unity.GrantManager/scripts/prometheus/alertmanager.yml new file mode 100644 index 0000000000..14af60fdd9 --- /dev/null +++ b/applications/Unity.GrantManager/scripts/prometheus/alertmanager.yml @@ -0,0 +1,15 @@ +global: + resolve_timeout: 5m + +route: + group_by: ["alertname", "type"] + group_wait: 10s + group_interval: 5m + repeat_interval: 1h + receiver: unity-webhook + +receivers: + - name: unity-webhook + webhook_configs: + - url: "http://unity-grantmanager-web:80/api/monitoring/alert" + send_resolved: false diff --git a/applications/Unity.GrantManager/scripts/prometheus/prometheus.yml b/applications/Unity.GrantManager/scripts/prometheus/prometheus.yml new file mode 100644 index 0000000000..1acc88d37d --- /dev/null +++ b/applications/Unity.GrantManager/scripts/prometheus/prometheus.yml @@ -0,0 +1,17 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +alerting: + alertmanagers: + - static_configs: + - targets: ["alertmanager:9093"] + +rule_files: + - /etc/prometheus/alert-rules.yml + +scrape_configs: + - job_name: unity-grantmanager + static_configs: + - targets: ["unity-grantmanager-web:80"] + metrics_path: /metrics diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertPayload.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertPayload.cs new file mode 100644 index 0000000000..69b3c758d3 --- /dev/null +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertPayload.cs @@ -0,0 +1,38 @@ +using System; +using System.Collections.Generic; +using System.Text.Json.Serialization; + +namespace Unity.GrantManager.Web.Controllers.Monitoring; + +public class AlertManagerPayload +{ + [JsonPropertyName("receiver")] + public string Receiver { get; set; } = string.Empty; + + [JsonPropertyName("status")] + public string Status { get; set; } = string.Empty; + + [JsonPropertyName("alerts")] + public List Alerts { get; set; } = []; +} + +public class AlertItem +{ + [JsonPropertyName("status")] + public string Status { get; set; } = string.Empty; + + [JsonPropertyName("labels")] + public Dictionary Labels { get; set; } = []; + + [JsonPropertyName("annotations")] + public Dictionary Annotations { get; set; } = []; + + [JsonPropertyName("startsAt")] + public DateTimeOffset StartsAt { get; set; } + + [JsonPropertyName("generatorURL")] + public string GeneratorURL { get; set; } = string.Empty; + + [JsonPropertyName("fingerprint")] + public string Fingerprint { get; set; } = string.Empty; +} diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs new file mode 100644 index 0000000000..499e5ab4dc --- /dev/null +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs @@ -0,0 +1,94 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.AspNetCore.Authorization; +using Microsoft.AspNetCore.Mvc; +using Microsoft.Extensions.Logging; +using Unity.GrantManager.Notifications; +using Unity.Notifications.TeamsNotifications; +using Volo.Abp.AspNetCore.Mvc; + +namespace Unity.GrantManager.Web.Controllers.Monitoring; + +[ApiController] +[Route("api/monitoring")] +[AllowAnonymous] +[IgnoreAntiforgeryToken] +public class AlertWebhookController( + INotificationsAppService notificationsAppService, + ILogger logger) : AbpController +{ + /// + /// Receives Alertmanager webhook payloads and forwards a concise summary to Teams. + /// + [HttpPost("alert")] + public async Task ProcessAlert([FromBody] AlertManagerPayload? payload) + { + if (payload is null || !ModelState.IsValid || payload.Alerts.Count == 0) + { + return BadRequest(); + } + + try + { + var firing = payload.Alerts + .Where(a => a.Status.Equals("firing", StringComparison.OrdinalIgnoreCase)) + .ToList(); + + if (firing.Count == 0) + { + return Ok(); + } + + // Use the first (or most severe) alert as the headline + var lead = firing[0]; + string alertName = lead.Labels.GetValueOrDefault("alertname", "Unknown Alert"); + string severity = lead.Labels.GetValueOrDefault("severity", "unknown"); + string summary = lead.Annotations.GetValueOrDefault("summary", alertName); + string description = lead.Annotations.GetValueOrDefault("description", string.Empty); + string @namespace = lead.Labels.GetValueOrDefault("kubernetes_namespace_name", + lead.Labels.GetValueOrDefault("namespace", string.Empty)); + string endpoint = lead.Labels.GetValueOrDefault("handler", + lead.Labels.GetValueOrDefault("endpoint", string.Empty)); + string? envInfo = Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"); + + string activityTitle = $"[{severity.ToUpperInvariant()}] {summary}"; + string activitySubtitle = $"Environment: {envInfo} | Namespace: {@namespace}"; + + var facts = new List(); + + if (!string.IsNullOrEmpty(description)) + { + facts.Add(new Fact { Name = "Description", Value = description }); + } + + if (firing.Count > 1) + { + facts.Add(new Fact { Name = "Firing alerts", Value = firing.Count.ToString() }); + } + + if (!string.IsNullOrEmpty(endpoint)) + { + facts.Add(new Fact { Name = "Affected endpoint", Value = endpoint }); + } + + facts.Add(new Fact { Name = "First seen", Value = lead.StartsAt.ToString("u") }); + + if (!string.IsNullOrEmpty(lead.GeneratorURL)) + { + facts.Add(new Fact { Name = "Source", Value = lead.GeneratorURL }); + } + + await notificationsAppService.PostToTeamsAsync(activityTitle, activitySubtitle, facts); + + return Ok(); + } + catch (Exception ex) + { + logger.LogError(ex, "Failed to forward alert {AlertName} to Teams", + payload.Alerts.FirstOrDefault()?.Labels.GetValueOrDefault("alertname")); + return StatusCode(500); + } + } +} diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs index e6f9d5eb17..79565fc7bf 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs @@ -77,6 +77,7 @@ using Unity.Reporting.Web; using Unity.AI.Web; using Unity.GrantManager.Web.Views.Settings; +using Prometheus; namespace Unity.GrantManager.Web; @@ -588,6 +589,8 @@ public override void OnApplicationInitialization(ApplicationInitializationContex app.UseCorrelationId(); app.UseStaticFiles(); app.UseRouting(); + app.UseHttpMetrics(); + app.MapMetrics(); app.UseAuthentication(); if (MultiTenancyConsts.IsEnabled) diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ErrorCountingLoggerProvider.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ErrorCountingLoggerProvider.cs new file mode 100644 index 0000000000..4fd7a5b9b8 --- /dev/null +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ErrorCountingLoggerProvider.cs @@ -0,0 +1,34 @@ +using System; +using Microsoft.Extensions.Logging; +using Prometheus; +using Serilog.Core; +using Serilog.Events; + +namespace Unity.GrantManager.Web.Middleware; + +/// +/// Shared Prometheus counter for application-level errors. +/// Labelled by log level ("error" / "critical") and exception type (empty when no exception). +/// Implemented as a Serilog ILogEventSink so it works alongside UseSerilog(). +/// Register via: .WriteTo.Sink(new ErrorCountingLoggerSink()) +/// +public sealed class ErrorCountingLoggerSink : ILogEventSink +{ + internal static readonly Counter ErrorCounter = + Metrics.CreateCounter( + "application_errors_total", + "Total application errors captured via Serilog", + new CounterConfiguration + { + LabelNames = ["level", "exception"] + }); + + public void Emit(LogEvent logEvent) + { + if (logEvent.Level < LogEventLevel.Error) return; + + string level = logEvent.Level.ToString().ToLowerInvariant(); + string exceptionType = logEvent.Exception?.GetType().Name ?? string.Empty; + ErrorCounter.WithLabels(level, exceptionType).Inc(); + } +} diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs new file mode 100644 index 0000000000..0550455097 --- /dev/null +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs @@ -0,0 +1,74 @@ +using System; +using System.Collections.Generic; +using System.Threading.Tasks; +using Microsoft.AspNetCore.Http; +using Prometheus; +using Unity.GrantManager.Notifications; +using Unity.Notifications.TeamsNotifications; + +namespace Unity.GrantManager.Web.Middleware; + +public class ExceptionCounterMiddleware(RequestDelegate next, INotificationsAppService notificationsAppService) +{ + private static readonly Counter ExceptionCounter = + Metrics.CreateCounter( + "application_exceptions_total", + "Total number of application exceptions", + new CounterConfiguration + { + LabelNames = ["type"] + }); + + public async Task InvokeAsync(HttpContext context) + { + try + { + await next(context); + } + catch (Exception ex) + { + ExceptionCounter.WithLabels(ex.GetType().Name).Inc(); + ErrorCountingLoggerSink.ErrorCounter.WithLabels("critical", ex.GetType().Name).Inc(); + await NotifyTeamsAsync(context, ex); + throw; + } + } + + private async Task NotifyTeamsAsync(HttpContext context, Exception ex) + { + try + { + string? env = Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"); + string endpoint = $"{context.Request.Method} {context.Request.Path}"; + + // Truncate stack trace — Teams message cards have a ~28 KB body limit + string stackTrace = ex.StackTrace ?? "(no stack trace)"; + if (stackTrace.Length > 1500) + { + stackTrace = stackTrace[..1500] + "\n... (truncated)"; + } + + string activityTitle = $"[CRITICAL] {ex.GetType().Name}"; + string activitySubtitle = $"Environment: {env} | {endpoint}"; + + var facts = new List + { + new() { Name = "Exception", Value = ex.GetType().FullName ?? ex.GetType().Name }, + new() { Name = "Message", Value = ex.Message }, + new() { Name = "Endpoint", Value = endpoint }, + new() { Name = "Stack Trace", Value = stackTrace }, + }; + + if (ex.InnerException is not null) + { + facts.Add(new Fact { Name = "Inner Exception", Value = ex.InnerException.Message }); + } + + await notificationsAppService.PostToTeamsAsync(activityTitle, activitySubtitle, facts); + } + catch + { + // Never let a Teams notification failure affect request handling + } + } +} diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs index 2f3f11cbd0..0f437e02a3 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs @@ -2,9 +2,12 @@ using Microsoft.AspNetCore.Diagnostics.HealthChecks; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Hosting; +using OpenTelemetry.Metrics; +using OpenTelemetry.Trace; using Serilog; using System; using System.Threading.Tasks; +using Unity.GrantManager.Web.Middleware; namespace Unity.GrantManager.Web; @@ -20,13 +23,25 @@ public async static Task Main(string[] args) Console.WriteLine("Starting web host."); var builder = WebApplication.CreateBuilder(args); builder.Services.AddHttpContextAccessor(); + builder.Services.AddOpenTelemetry() + .WithTracing(tracing => tracing + .AddAspNetCoreInstrumentation() + .AddHttpClientInstrumentation()) + .WithMetrics(metrics => metrics + .AddAspNetCoreInstrumentation() + .AddHttpClientInstrumentation() + .AddRuntimeInstrumentation()); builder.Host.AddAppSettingsSecretsJson() .UseAutofac() .UseSerilog((hostingContext, loggerConfiguration) => - loggerConfiguration.ReadFrom.Configuration(hostingContext.Configuration)); + loggerConfiguration + .ReadFrom.Configuration(hostingContext.Configuration) + .WriteTo.Sink(new ErrorCountingLoggerSink())); await builder.AddApplicationAsync(); var app = builder.Build(); + app.UseMiddleware(); + app.MapHealthChecks("/healthz/live", new HealthCheckOptions() { Predicate = healthCheck => healthCheck.Tags.Contains("live") }); // Liveness (dumb) app.MapHealthChecks("/healthz/ready", diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj index f44d2fc836..553a7af521 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj @@ -76,6 +76,12 @@ + + + + + + From 865d808091f6751d1f843832573d5464cb8c8bd3 Mon Sep 17 00:00:00 2001 From: JamesPasta Date: Thu, 7 May 2026 13:04:17 -0700 Subject: [PATCH 02/13] feature/AB#32049-Promethious --- .../openshift/alertmanager-config.yaml | 29 ++++++++++++ .../scripts/openshift/prometheus-rule.yaml | 44 +++++++++++++++++++ .../scripts/openshift/service-monitor.yaml | 23 ++++++++++ 3 files changed, 96 insertions(+) create mode 100644 applications/Unity.GrantManager/scripts/openshift/alertmanager-config.yaml create mode 100644 applications/Unity.GrantManager/scripts/openshift/prometheus-rule.yaml create mode 100644 applications/Unity.GrantManager/scripts/openshift/service-monitor.yaml diff --git a/applications/Unity.GrantManager/scripts/openshift/alertmanager-config.yaml b/applications/Unity.GrantManager/scripts/openshift/alertmanager-config.yaml new file mode 100644 index 0000000000..507a0546ce --- /dev/null +++ b/applications/Unity.GrantManager/scripts/openshift/alertmanager-config.yaml @@ -0,0 +1,29 @@ +# AlertmanagerConfig CRD — routes fired alerts to the app webhook → Teams +# Deploy with: oc apply -f scripts/openshift/alertmanager-config.yaml -n d18498- +# +# Replaces: scripts/prometheus/alertmanager.yml (docker-compose local only) +# +# Prerequisites: +# The cluster Alertmanager must have alertmanagerConfigSelector set to pick up this config. +# On BC Gov Silver this is typically enabled by default in user namespaces. +apiVersion: monitoring.coreos.com/v1alpha1 +kind: AlertmanagerConfig +metadata: + name: unity-grantmanager-alerts + labels: + alertmanagerConfig: unity-grantmanager # must match Alertmanager's alertmanagerConfigSelector +spec: + route: + groupBy: ["alertname", "type"] + groupWait: 10s + groupInterval: 5m + repeatInterval: 1h + receiver: unity-webhook + + receivers: + - name: unity-webhook + webhookConfigs: + - url: "https:///api/monitoring/alert" + # Replace with the OpenShift Route hostname, e.g.: + # unity-grantmanager-web-d18498-test.apps.silver.devops.gov.bc.ca + sendResolved: false diff --git a/applications/Unity.GrantManager/scripts/openshift/prometheus-rule.yaml b/applications/Unity.GrantManager/scripts/openshift/prometheus-rule.yaml new file mode 100644 index 0000000000..d4096abc2a --- /dev/null +++ b/applications/Unity.GrantManager/scripts/openshift/prometheus-rule.yaml @@ -0,0 +1,44 @@ +# PrometheusRule CRD — loaded by the OpenShift cluster Prometheus Operator +# Deploy with: oc apply -f scripts/openshift/prometheus-rule.yaml -n d18498- +# +# Replaces: scripts/prometheus/alert-rules.yml (docker-compose local only) +apiVersion: monitoring.coreos.com/v1 +kind: PrometheusRule +metadata: + name: unity-grantmanager-exceptions + labels: + # These labels must match the Prometheus Operator's ruleSelector in your namespace. + # On BC Gov Silver cluster the label below is standard. + role: alert-rules +spec: + groups: + - name: unity-grantmanager-exceptions + rules: + # Fire if any exception type exceeds 5 occurrences in a 5-minute window + - alert: HighExceptionRate + expr: | + increase(application_exceptions_total[5m]) > 5 + for: 1m + labels: + severity: critical + annotations: + summary: "High exception rate in Unity GrantManager" + description: > + Exception type {{ $labels.type }} has fired {{ $value | humanize }} times + in the last 5 minutes (namespace: {{ $labels.namespace }}). + + # Fire if a new exception type appears (catches regressions after deploys) + - alert: NewExceptionType + expr: | + increase(application_exceptions_total[10m]) > 0 + unless ( + increase(application_exceptions_total[10m] offset 10m) > 0 + ) + for: 0m + labels: + severity: warning + annotations: + summary: "New exception type detected in Unity GrantManager" + description: > + A new exception type {{ $labels.type }} appeared for the first time + in the last 10 minutes (namespace: {{ $labels.namespace }}). diff --git a/applications/Unity.GrantManager/scripts/openshift/service-monitor.yaml b/applications/Unity.GrantManager/scripts/openshift/service-monitor.yaml new file mode 100644 index 0000000000..f5428da670 --- /dev/null +++ b/applications/Unity.GrantManager/scripts/openshift/service-monitor.yaml @@ -0,0 +1,23 @@ +# ServiceMonitor CRD — tells the Prometheus Operator how to scrape /metrics from the app +# Deploy with: oc apply -f scripts/openshift/service-monitor.yaml -n d18498- +# +# Replaces: scrape_configs in scripts/prometheus/prometheus.yml (docker-compose local only) +# +# Prerequisites: +# The app Service must exist and expose port 8080 (or 80). +# Adjust 'port' below to match your Service's named port. +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: unity-grantmanager + labels: + app: unity-grantmanager +spec: + selector: + matchLabels: + app: unity-grantmanager # must match labels on your OpenShift Service + endpoints: + - port: http # named port on the Service pointing to 8080 + path: /metrics + interval: 15s + scheme: http From 428fa0b68082667504fd850df941c6024db88f09 Mon Sep 17 00:00:00 2001 From: JamesPasta Date: Thu, 7 May 2026 13:15:47 -0700 Subject: [PATCH 03/13] feature/AB#32049-Promethious --- .../Unity.GrantManager.Web/Unity.GrantManager.Web.csproj | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj index 553a7af521..4d503c0820 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj @@ -78,10 +78,11 @@ - - - - + + + + + From d40bb76892779a1c7f9a546fc9044718199bd9ff Mon Sep 17 00:00:00 2001 From: JamesPasta Date: Thu, 7 May 2026 13:31:52 -0700 Subject: [PATCH 04/13] feature/AB#32049-Promethious --- .../openshift/alertmanager-config.yaml | 29 -------- .../scripts/prometheus/alert-rules.yml | 4 +- .../scripts/prometheus/alertmanager.yml | 2 +- .../scripts/prometheus/prometheus.yml | 2 +- .../Controllers/Monitoring/AlertPayload.cs | 23 ++++++- .../Monitoring/AlertWebhookController.cs | 17 ++++- .../GrantManagerWebModule.cs | 3 +- .../Identity/InternalNetworkRequirement.cs | 69 +++++++++++++++++++ .../Identity/PolicyRegistrant.cs | 5 ++ .../src/Unity.GrantManager.Web/Program.cs | 6 +- 10 files changed, 117 insertions(+), 43 deletions(-) create mode 100644 applications/Unity.GrantManager/src/Unity.GrantManager.Web/Identity/InternalNetworkRequirement.cs diff --git a/applications/Unity.GrantManager/scripts/openshift/alertmanager-config.yaml b/applications/Unity.GrantManager/scripts/openshift/alertmanager-config.yaml index 507a0546ce..e69de29bb2 100644 --- a/applications/Unity.GrantManager/scripts/openshift/alertmanager-config.yaml +++ b/applications/Unity.GrantManager/scripts/openshift/alertmanager-config.yaml @@ -1,29 +0,0 @@ -# AlertmanagerConfig CRD — routes fired alerts to the app webhook → Teams -# Deploy with: oc apply -f scripts/openshift/alertmanager-config.yaml -n d18498- -# -# Replaces: scripts/prometheus/alertmanager.yml (docker-compose local only) -# -# Prerequisites: -# The cluster Alertmanager must have alertmanagerConfigSelector set to pick up this config. -# On BC Gov Silver this is typically enabled by default in user namespaces. -apiVersion: monitoring.coreos.com/v1alpha1 -kind: AlertmanagerConfig -metadata: - name: unity-grantmanager-alerts - labels: - alertmanagerConfig: unity-grantmanager # must match Alertmanager's alertmanagerConfigSelector -spec: - route: - groupBy: ["alertname", "type"] - groupWait: 10s - groupInterval: 5m - repeatInterval: 1h - receiver: unity-webhook - - receivers: - - name: unity-webhook - webhookConfigs: - - url: "https:///api/monitoring/alert" - # Replace with the OpenShift Route hostname, e.g.: - # unity-grantmanager-web-d18498-test.apps.silver.devops.gov.bc.ca - sendResolved: false diff --git a/applications/Unity.GrantManager/scripts/prometheus/alert-rules.yml b/applications/Unity.GrantManager/scripts/prometheus/alert-rules.yml index b53ad823d8..c8ec1ad6e6 100644 --- a/applications/Unity.GrantManager/scripts/prometheus/alert-rules.yml +++ b/applications/Unity.GrantManager/scripts/prometheus/alert-rules.yml @@ -12,9 +12,7 @@ groups: summary: "High exception rate in Unity GrantManager" description: > Exception type {{ $labels.type }} has fired {{ $value | humanize }} times - in the last 5 minutes (namespace: {{ $labels.kubernetes_namespace_name }}). - - # Fire if any new exception type appears (catches regressions) + in the last 5 minutes (job: {{ $labels.job }}, instance: {{ $labels.instance }}). - alert: NewExceptionType expr: | increase(application_exceptions_total[10m]) > 0 diff --git a/applications/Unity.GrantManager/scripts/prometheus/alertmanager.yml b/applications/Unity.GrantManager/scripts/prometheus/alertmanager.yml index 14af60fdd9..f313fbf3b7 100644 --- a/applications/Unity.GrantManager/scripts/prometheus/alertmanager.yml +++ b/applications/Unity.GrantManager/scripts/prometheus/alertmanager.yml @@ -11,5 +11,5 @@ route: receivers: - name: unity-webhook webhook_configs: - - url: "http://unity-grantmanager-web:80/api/monitoring/alert" + - url: "http://unity-grantmanager-web:8080/api/monitoring/alert" send_resolved: false diff --git a/applications/Unity.GrantManager/scripts/prometheus/prometheus.yml b/applications/Unity.GrantManager/scripts/prometheus/prometheus.yml index 1acc88d37d..4bc80b2be4 100644 --- a/applications/Unity.GrantManager/scripts/prometheus/prometheus.yml +++ b/applications/Unity.GrantManager/scripts/prometheus/prometheus.yml @@ -13,5 +13,5 @@ rule_files: scrape_configs: - job_name: unity-grantmanager static_configs: - - targets: ["unity-grantmanager-web:80"] + - targets: ["unity-grantmanager-web:8080"] metrics_path: /metrics diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertPayload.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertPayload.cs index 69b3c758d3..75a9c2bfa0 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertPayload.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertPayload.cs @@ -12,8 +12,14 @@ public class AlertManagerPayload [JsonPropertyName("status")] public string Status { get; set; } = string.Empty; + private List _alerts = []; + [JsonPropertyName("alerts")] - public List Alerts { get; set; } = []; + public List Alerts + { + get => _alerts; + set => _alerts = value ?? []; + } } public class AlertItem @@ -21,11 +27,22 @@ public class AlertItem [JsonPropertyName("status")] public string Status { get; set; } = string.Empty; + private Dictionary _labels = []; + private Dictionary _annotations = []; + [JsonPropertyName("labels")] - public Dictionary Labels { get; set; } = []; + public Dictionary Labels + { + get => _labels; + set => _labels = value ?? []; + } [JsonPropertyName("annotations")] - public Dictionary Annotations { get; set; } = []; + public Dictionary Annotations + { + get => _annotations; + set => _annotations = value ?? []; + } [JsonPropertyName("startsAt")] public DateTimeOffset StartsAt { get; set; } diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs index 499e5ab4dc..6d655ca17d 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs @@ -33,7 +33,7 @@ public async Task ProcessAlert([FromBody] AlertManagerPayload? pa try { var firing = payload.Alerts - .Where(a => a.Status.Equals("firing", StringComparison.OrdinalIgnoreCase)) + .Where(a => a is not null && a.Status.Equals("firing", StringComparison.OrdinalIgnoreCase)) .ToList(); if (firing.Count == 0) @@ -41,8 +41,10 @@ public async Task ProcessAlert([FromBody] AlertManagerPayload? pa return Ok(); } - // Use the first (or most severe) alert as the headline - var lead = firing[0]; + // Pick the most severe alert as the headline (critical > warning > info > unknown) + var lead = firing + .OrderBy(a => SeverityOrder(a.Labels.GetValueOrDefault("severity", "unknown"))) + .First(); string alertName = lead.Labels.GetValueOrDefault("alertname", "Unknown Alert"); string severity = lead.Labels.GetValueOrDefault("severity", "unknown"); string summary = lead.Annotations.GetValueOrDefault("summary", alertName); @@ -91,4 +93,13 @@ public async Task ProcessAlert([FromBody] AlertManagerPayload? pa return StatusCode(500); } } + + private static int SeverityOrder(string severity) => severity.ToLowerInvariant() switch + { + "critical" => 0, + "error" => 1, + "warning" => 2, + "info" => 3, + _ => 4 + }; } diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs index 79565fc7bf..5b6a2d1b38 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs @@ -277,6 +277,7 @@ private static void ConfgureFormsApiAuhentication(ServiceConfigurationContext co private static void ConfigurePolicies(ServiceConfigurationContext context) { + context.Services.AddScoped(); PolicyRegistrant.Register(context); } @@ -590,7 +591,6 @@ public override void OnApplicationInitialization(ApplicationInitializationContex app.UseStaticFiles(); app.UseRouting(); app.UseHttpMetrics(); - app.MapMetrics(); app.UseAuthentication(); if (MultiTenancyConsts.IsEnabled) @@ -600,6 +600,7 @@ public override void OnApplicationInitialization(ApplicationInitializationContex app.UseUnitOfWork(); app.UseAuthorization(); + app.MapMetrics().RequireAuthorization(Unity.GrantManager.Web.Identity.Policy.PolicyRegistrant.MetricsAccessPolicy); if (IsProfilingAllowed(env, configuration)) { app.UseMiniProfiler(); diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Identity/InternalNetworkRequirement.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Identity/InternalNetworkRequirement.cs new file mode 100644 index 0000000000..9ba56bdf96 --- /dev/null +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Identity/InternalNetworkRequirement.cs @@ -0,0 +1,69 @@ +using System.Net; +using System.Net.Sockets; +using System.Threading.Tasks; +using Microsoft.AspNetCore.Authorization; +using Microsoft.AspNetCore.Http; + +namespace Unity.GrantManager.Web.Identity; + +/// +/// Allows access to /metrics only from loopback or RFC-1918 private addresses. +/// This permits Prometheus to scrape pod-to-pod within the OpenShift cluster +/// while blocking external callers. +/// +public class InternalNetworkRequirement : IAuthorizationRequirement { } + +public class InternalNetworkHandler(IHttpContextAccessor httpContextAccessor) + : AuthorizationHandler +{ + protected override Task HandleRequirementAsync( + AuthorizationHandlerContext context, + InternalNetworkRequirement requirement) + { + var remoteIp = httpContextAccessor.HttpContext?.Connection.RemoteIpAddress; + + if (remoteIp is null) + { + context.Fail(); + return Task.CompletedTask; + } + + // Map IPv4-in-IPv6 (::ffff:x.x.x.x) back to IPv4 for range checks + if (remoteIp.IsIPv4MappedToIPv6) + { + remoteIp = remoteIp.MapToIPv4(); + } + + if (IsAllowed(remoteIp)) + { + context.Succeed(requirement); + } + else + { + context.Fail(); + } + + return Task.CompletedTask; + } + + private static bool IsAllowed(IPAddress ip) + { + if (IPAddress.IsLoopback(ip)) return true; + + if (ip.AddressFamily == AddressFamily.InterNetwork) + { + byte[] bytes = ip.GetAddressBytes(); + + // 10.0.0.0/8 + if (bytes[0] == 10) return true; + + // 172.16.0.0/12 + if (bytes[0] == 172 && bytes[1] >= 16 && bytes[1] <= 31) return true; + + // 192.168.0.0/16 + if (bytes[0] == 192 && bytes[1] == 168) return true; + } + + return false; + } +} diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Identity/PolicyRegistrant.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Identity/PolicyRegistrant.cs index 10e9446c70..ea05317e48 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Identity/PolicyRegistrant.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Identity/PolicyRegistrant.cs @@ -12,12 +12,17 @@ namespace Unity.GrantManager.Web.Identity.Policy; internal static class PolicyRegistrant { internal const string PermissionConstant = "Permission"; + internal const string MetricsAccessPolicy = "MetricsAccess"; internal static void Register(ServiceConfigurationContext context) { // Using AddAuthorizationBuilder to register authorization services and construct policies var authorizationBuilder = context.Services.AddAuthorizationBuilder(); + // Metrics endpoint — allow only loopback / RFC-1918 (cluster-internal) callers + authorizationBuilder.AddPolicy(MetricsAccessPolicy, + policy => policy.AddRequirements(new InternalNetworkRequirement())); + // Identity Role Policies authorizationBuilder.AddPolicy(IdentityPermissions.Roles.Default, policy => policy.RequireClaim(PermissionConstant, IdentityPermissions.Roles.Default)); diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs index 0f437e02a3..8920d5f3ee 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs @@ -26,11 +26,13 @@ public async static Task Main(string[] args) builder.Services.AddOpenTelemetry() .WithTracing(tracing => tracing .AddAspNetCoreInstrumentation() - .AddHttpClientInstrumentation()) + .AddHttpClientInstrumentation() + .AddOtlpExporter()) .WithMetrics(metrics => metrics .AddAspNetCoreInstrumentation() .AddHttpClientInstrumentation() - .AddRuntimeInstrumentation()); + .AddRuntimeInstrumentation() + .AddOtlpExporter()); builder.Host.AddAppSettingsSecretsJson() .UseAutofac() .UseSerilog((hostingContext, loggerConfiguration) => From da8652c982f6caedf737e009da078e764826ffa4 Mon Sep 17 00:00:00 2001 From: JamesPasta Date: Thu, 7 May 2026 13:40:29 -0700 Subject: [PATCH 05/13] feature/AB#32049-Promethious --- .../src/Unity.GrantManager.Web/GrantManagerWebModule.cs | 6 +++++- .../src/Unity.GrantManager.Web/Program.cs | 2 -- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs index 5b6a2d1b38..eb77d16259 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs @@ -589,6 +589,7 @@ public override void OnApplicationInitialization(ApplicationInitializationContex app.UseCorrelationId(); app.UseStaticFiles(); + app.UseMiddleware(); app.UseRouting(); app.UseHttpMetrics(); app.UseAuthentication(); @@ -600,7 +601,10 @@ public override void OnApplicationInitialization(ApplicationInitializationContex app.UseUnitOfWork(); app.UseAuthorization(); - app.MapMetrics().RequireAuthorization(Unity.GrantManager.Web.Identity.Policy.PolicyRegistrant.MetricsAccessPolicy); + app.UseEndpoints(endpoints => + { + endpoints.MapMetrics().RequireAuthorization(Unity.GrantManager.Web.Identity.Policy.PolicyRegistrant.MetricsAccessPolicy); + }); if (IsProfilingAllowed(env, configuration)) { app.UseMiniProfiler(); diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs index 8920d5f3ee..6debc33b41 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs @@ -42,8 +42,6 @@ public async static Task Main(string[] args) await builder.AddApplicationAsync(); var app = builder.Build(); - app.UseMiddleware(); - app.MapHealthChecks("/healthz/live", new HealthCheckOptions() { Predicate = healthCheck => healthCheck.Tags.Contains("live") }); // Liveness (dumb) app.MapHealthChecks("/healthz/ready", From a9b63d03881be49bd06a7dce85ef8830b400683a Mon Sep 17 00:00:00 2001 From: JamesPasta Date: Thu, 7 May 2026 16:35:48 -0700 Subject: [PATCH 06/13] feature/AB#32049-Promethious --- .../Monitoring/AlertWebhookController.cs | 6 +- .../GrantManagerWebModule.cs | 3 +- .../Identity/InternalNetworkRequirement.cs | 2 +- ...Provider.cs => ErrorCountingLoggerSink.cs} | 4 +- .../Middleware/ExceptionCounterMiddleware.cs | 95 +++++++++----- .../ExceptionNotificationThrottle.cs | 58 +++++++++ .../Unity.GrantManager.Web.csproj | 10 +- .../Identity/InternalNetworkHandlerTests.cs | 118 ++++++++++++++++++ 8 files changed, 253 insertions(+), 43 deletions(-) rename applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/{ErrorCountingLoggerProvider.cs => ErrorCountingLoggerSink.cs} (87%) create mode 100644 applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionNotificationThrottle.cs create mode 100644 applications/Unity.GrantManager/test/Unity.GrantManager.Web.Tests/Identity/InternalNetworkHandlerTests.cs diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs index 6d655ca17d..cf49e053d9 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs @@ -33,7 +33,7 @@ public async Task ProcessAlert([FromBody] AlertManagerPayload? pa try { var firing = payload.Alerts - .Where(a => a is not null && a.Status.Equals("firing", StringComparison.OrdinalIgnoreCase)) + .Where(a => a is not null && string.Equals(a.Status, "firing", StringComparison.OrdinalIgnoreCase)) .ToList(); if (firing.Count == 0) @@ -89,12 +89,12 @@ public async Task ProcessAlert([FromBody] AlertManagerPayload? pa catch (Exception ex) { logger.LogError(ex, "Failed to forward alert {AlertName} to Teams", - payload.Alerts.FirstOrDefault()?.Labels.GetValueOrDefault("alertname")); + payload.Alerts.FirstOrDefault()?.Labels?.GetValueOrDefault("alertname")); return StatusCode(500); } } - private static int SeverityOrder(string severity) => severity.ToLowerInvariant() switch + private static int SeverityOrder(string? severity) => severity?.ToLowerInvariant() switch { "critical" => 0, "error" => 1, diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs index eb77d16259..747e6cbb9f 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs @@ -277,7 +277,8 @@ private static void ConfgureFormsApiAuhentication(ServiceConfigurationContext co private static void ConfigurePolicies(ServiceConfigurationContext context) { - context.Services.AddScoped(); + context.Services.AddScoped(); + context.Services.AddSingleton(); PolicyRegistrant.Register(context); } diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Identity/InternalNetworkRequirement.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Identity/InternalNetworkRequirement.cs index 9ba56bdf96..ade7147bca 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Identity/InternalNetworkRequirement.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Identity/InternalNetworkRequirement.cs @@ -4,7 +4,7 @@ using Microsoft.AspNetCore.Authorization; using Microsoft.AspNetCore.Http; -namespace Unity.GrantManager.Web.Identity; +namespace Unity.GrantManager.Web.Identity.Policy; /// /// Allows access to /metrics only from loopback or RFC-1918 private addresses. diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ErrorCountingLoggerProvider.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ErrorCountingLoggerSink.cs similarity index 87% rename from applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ErrorCountingLoggerProvider.cs rename to applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ErrorCountingLoggerSink.cs index 4fd7a5b9b8..e7380fb093 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ErrorCountingLoggerProvider.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ErrorCountingLoggerSink.cs @@ -1,5 +1,3 @@ -using System; -using Microsoft.Extensions.Logging; using Prometheus; using Serilog.Core; using Serilog.Events; @@ -8,7 +6,7 @@ namespace Unity.GrantManager.Web.Middleware; /// /// Shared Prometheus counter for application-level errors. -/// Labelled by log level ("error" / "critical") and exception type (empty when no exception). +/// Labelled by log level ("error" / "fatal") and exception type (empty when no exception). /// Implemented as a Serilog ILogEventSink so it works alongside UseSerilog(). /// Register via: .WriteTo.Sink(new ErrorCountingLoggerSink()) /// diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs index 0550455097..e9a33146eb 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs @@ -2,14 +2,23 @@ using System.Collections.Generic; using System.Threading.Tasks; using Microsoft.AspNetCore.Http; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; using Prometheus; using Unity.GrantManager.Notifications; using Unity.Notifications.TeamsNotifications; namespace Unity.GrantManager.Web.Middleware; -public class ExceptionCounterMiddleware(RequestDelegate next, INotificationsAppService notificationsAppService) +public class ExceptionCounterMiddleware( + RequestDelegate next, + ExceptionNotificationThrottle throttle, + ILogger logger) { + // Notify only in these environments; add "Staging" if desired + private static readonly HashSet NotifyEnvironments = + new(StringComparer.OrdinalIgnoreCase) { "Production" }; + private static readonly Counter ExceptionCounter = Metrics.CreateCounter( "application_exceptions_total", @@ -28,47 +37,73 @@ public async Task InvokeAsync(HttpContext context) catch (Exception ex) { ExceptionCounter.WithLabels(ex.GetType().Name).Inc(); - ErrorCountingLoggerSink.ErrorCounter.WithLabels("critical", ex.GetType().Name).Inc(); - await NotifyTeamsAsync(context, ex); + ErrorCountingLoggerSink.ErrorCounter.WithLabels("fatal", ex.GetType().Name).Inc(); + + QueueTeamsNotification(context, ex); + throw; } } - private async Task NotifyTeamsAsync(HttpContext context, Exception ex) + private void QueueTeamsNotification(HttpContext context, Exception ex) { - try + string? env = Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"); + + if (!NotifyEnvironments.Contains(env ?? string.Empty)) { - string? env = Environment.GetEnvironmentVariable("ASPNETCORE_ENVIRONMENT"); - string endpoint = $"{context.Request.Method} {context.Request.Path}"; + return; + } - // Truncate stack trace — Teams message cards have a ~28 KB body limit - string stackTrace = ex.StackTrace ?? "(no stack trace)"; - if (stackTrace.Length > 1500) - { - stackTrace = stackTrace[..1500] + "\n... (truncated)"; - } + if (!throttle.ShouldNotify(ex.GetType().Name)) + { + return; + } - string activityTitle = $"[CRITICAL] {ex.GetType().Name}"; - string activitySubtitle = $"Environment: {env} | {endpoint}"; + // Capture values from the request context before it is disposed + string endpoint = $"{context.Request.Method} {context.Request.Path}"; + string exTypeName = ex.GetType().FullName ?? ex.GetType().Name; + string exMessage = ex.Message; + string innerMessage = ex.InnerException?.Message ?? string.Empty; + string stackTrace = ex.StackTrace ?? "(no stack trace)"; + if (stackTrace.Length > 1500) + { + stackTrace = stackTrace[..1500] + "\n... (truncated)"; + } - var facts = new List + // Resolve a scoped INotificationsAppService from a fresh DI scope so + // we can safely use it after the request scope has ended + var scopeFactory = context.RequestServices.GetRequiredService(); + + _ = Task.Run(async () => + { + try { - new() { Name = "Exception", Value = ex.GetType().FullName ?? ex.GetType().Name }, - new() { Name = "Message", Value = ex.Message }, - new() { Name = "Endpoint", Value = endpoint }, - new() { Name = "Stack Trace", Value = stackTrace }, - }; + await using var scope = scopeFactory.CreateAsyncScope(); + var notifications = scope.ServiceProvider.GetRequiredService(); + + string activityTitle = $"[CRITICAL] {ex.GetType().Name}"; + string activitySubtitle = $"Environment: {env} | {endpoint}"; - if (ex.InnerException is not null) + var facts = new List + { + new() { Name = "Exception", Value = exTypeName }, + new() { Name = "Message", Value = exMessage }, + new() { Name = "Endpoint", Value = endpoint }, + new() { Name = "Stack Trace", Value = stackTrace }, + }; + + if (!string.IsNullOrEmpty(innerMessage)) + { + facts.Add(new Fact { Name = "Inner Exception", Value = innerMessage }); + } + + await notifications.PostToTeamsAsync(activityTitle, activitySubtitle, facts); + } + catch (Exception notifyEx) { - facts.Add(new Fact { Name = "Inner Exception", Value = ex.InnerException.Message }); + logger.LogWarning(notifyEx, "Failed to send Teams exception notification"); } - - await notificationsAppService.PostToTeamsAsync(activityTitle, activitySubtitle, facts); - } - catch - { - // Never let a Teams notification failure affect request handling - } + }); } } + diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionNotificationThrottle.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionNotificationThrottle.cs new file mode 100644 index 0000000000..1c6e1d2422 --- /dev/null +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionNotificationThrottle.cs @@ -0,0 +1,58 @@ +using System; +using System.Collections.Concurrent; +using System.Threading; + +namespace Unity.GrantManager.Web.Middleware; + +/// +/// Singleton that tracks per-exception-type cooldowns and a global rate limit +/// to prevent Teams notification storms during an outage. +/// +public sealed class ExceptionNotificationThrottle +{ + // Only send one notification per exception type per cooldown window + private static readonly TimeSpan PerTypeCooldown = TimeSpan.FromMinutes(5); + + // Global cap: at most N notifications per rolling minute across all types + private const int GlobalMaxPerMinute = 5; + + private readonly ConcurrentDictionary _lastSent = new(); + private int _sentThisMinute; + private DateTimeOffset _windowStart = DateTimeOffset.UtcNow; + + /// + /// Returns true if a Teams notification should be sent for this exception type. + /// Thread-safe. + /// + public bool ShouldNotify(string exceptionTypeName) + { + ResetWindowIfNeeded(); + + if (_sentThisMinute >= GlobalMaxPerMinute) + { + return false; + } + + var now = DateTimeOffset.UtcNow; + + if (_lastSent.TryGetValue(exceptionTypeName, out var last) && + now - last < PerTypeCooldown) + { + return false; + } + + _lastSent[exceptionTypeName] = now; + Interlocked.Increment(ref _sentThisMinute); + return true; + } + + private void ResetWindowIfNeeded() + { + var now = DateTimeOffset.UtcNow; + if (now - _windowStart >= TimeSpan.FromMinutes(1)) + { + Interlocked.Exchange(ref _sentThisMinute, 0); + _windowStart = now; + } + } +} diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj index 4d503c0820..ea3f46bc86 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj @@ -78,11 +78,11 @@ - - - - - + + + + + diff --git a/applications/Unity.GrantManager/test/Unity.GrantManager.Web.Tests/Identity/InternalNetworkHandlerTests.cs b/applications/Unity.GrantManager/test/Unity.GrantManager.Web.Tests/Identity/InternalNetworkHandlerTests.cs new file mode 100644 index 0000000000..0351767ab3 --- /dev/null +++ b/applications/Unity.GrantManager/test/Unity.GrantManager.Web.Tests/Identity/InternalNetworkHandlerTests.cs @@ -0,0 +1,118 @@ +using System.Net; +using System.Security.Claims; +using System.Threading.Tasks; +using Microsoft.AspNetCore.Authorization; +using Microsoft.AspNetCore.Http; +using NSubstitute; +using Shouldly; +using Unity.GrantManager.Web.Identity.Policy; +using Xunit; + +namespace Unity.GrantManager.Identity; + +public class InternalNetworkHandlerTests +{ + private static Task BuildContextAsync(IPAddress remoteIp) + { + var httpContext = new DefaultHttpContext(); + httpContext.Connection.RemoteIpAddress = remoteIp; + + var httpContextAccessor = Substitute.For(); + httpContextAccessor.HttpContext.Returns(httpContext); + + var requirement = new InternalNetworkRequirement(); + var authContext = new AuthorizationHandlerContext( + [requirement], + new ClaimsPrincipal(), + null); + + var handler = new InternalNetworkHandler(httpContextAccessor); + return handler.HandleAsync(authContext).ContinueWith(_ => authContext); + } + + [Theory] + [InlineData("127.0.0.1")] // IPv4 loopback + [InlineData("::1")] // IPv6 loopback + [InlineData("10.0.0.1")] // 10/8 start + [InlineData("10.255.255.255")] // 10/8 end + [InlineData("172.16.0.1")] // 172.16/12 start + [InlineData("172.31.255.255")] // 172.16/12 end + [InlineData("192.168.0.1")] // 192.168/16 start + [InlineData("192.168.255.255")] // 192.168/16 end + public async Task Allows_InternalAddresses(string ip) + { + var ctx = await BuildContextAsync(IPAddress.Parse(ip)); + ctx.HasSucceeded.ShouldBeTrue($"{ip} should be allowed"); + } + + [Theory] + [InlineData("8.8.8.8")] // public internet + [InlineData("172.15.255.255")] // just below 172.16/12 + [InlineData("172.32.0.0")] // just above 172.16/12 + [InlineData("192.167.255.255")] // just below 192.168/16 + [InlineData("192.169.0.0")] // just above 192.168/16 + [InlineData("11.0.0.0")] // not 10/8 + [InlineData("203.0.113.1")] // TEST-NET-3 (documentation range) + public async Task Denies_ExternalAddresses(string ip) + { + var ctx = await BuildContextAsync(IPAddress.Parse(ip)); + ctx.HasSucceeded.ShouldBeFalse($"{ip} should be denied"); + } + + [Fact] + public async Task Allows_IPv4MappedToIPv6_Loopback() + { + // ::ffff:127.0.0.1 — loopback mapped into IPv6 + var ip = IPAddress.Parse("::ffff:127.0.0.1"); + var ctx = await BuildContextAsync(ip); + ctx.HasSucceeded.ShouldBeTrue("IPv4-mapped loopback should be allowed"); + } + + [Fact] + public async Task Allows_IPv4MappedToIPv6_PrivateRange() + { + // ::ffff:10.0.0.1 — private range mapped into IPv6 + var ip = IPAddress.Parse("::ffff:10.0.0.1"); + var ctx = await BuildContextAsync(ip); + ctx.HasSucceeded.ShouldBeTrue("IPv4-mapped private address should be allowed"); + } + + [Fact] + public async Task Denies_NullRemoteIp() + { + var httpContext = new DefaultHttpContext(); + // RemoteIpAddress is null by default on DefaultHttpContext + + var httpContextAccessor = Substitute.For(); + httpContextAccessor.HttpContext.Returns(httpContext); + + var requirement = new InternalNetworkRequirement(); + var authContext = new AuthorizationHandlerContext( + [requirement], + new ClaimsPrincipal(), + null); + + var handler = new InternalNetworkHandler(httpContextAccessor); + await handler.HandleAsync(authContext); + + authContext.HasSucceeded.ShouldBeFalse("null remote IP should be denied"); + } + + [Fact] + public async Task Denies_NullHttpContext() + { + var httpContextAccessor = Substitute.For(); + httpContextAccessor.HttpContext.Returns((HttpContext?)null); + + var requirement = new InternalNetworkRequirement(); + var authContext = new AuthorizationHandlerContext( + [requirement], + new ClaimsPrincipal(), + null); + + var handler = new InternalNetworkHandler(httpContextAccessor); + await handler.HandleAsync(authContext); + + authContext.HasSucceeded.ShouldBeFalse("null HttpContext should be denied"); + } +} From 2ea577204bc670f41a2f49d800f26c16051e498b Mon Sep 17 00:00:00 2001 From: JamesPasta Date: Thu, 7 May 2026 16:45:33 -0700 Subject: [PATCH 07/13] feature/AB#32049-Promethious --- .../Monitoring/AlertWebhookController.cs | 2 +- .../GrantManagerWebModule.cs | 21 ++++++++++++++++ .../src/Unity.GrantManager.Web/Program.cs | 25 ++++++++++++------- 3 files changed, 38 insertions(+), 10 deletions(-) diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs index cf49e053d9..ea32f890c5 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Controllers/Monitoring/AlertWebhookController.cs @@ -41,7 +41,7 @@ public async Task ProcessAlert([FromBody] AlertManagerPayload? pa return Ok(); } - // Pick the most severe alert as the headline (critical > warning > info > unknown) + // Pick the most severe alert as the headline (critical > error > warning > info > unknown) var lead = firing .OrderBy(a => SeverityOrder(a.Labels.GetValueOrDefault("severity", "unknown"))) .First(); diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs index 747e6cbb9f..c3eb88b711 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs @@ -5,6 +5,7 @@ using Microsoft.AspNetCore.CookiePolicy; using Microsoft.AspNetCore.Hosting; using Microsoft.AspNetCore.Http; +using Microsoft.AspNetCore.HttpOverrides; using Microsoft.AspNetCore.Localization; using Microsoft.Extensions.Configuration; using Microsoft.Extensions.DependencyInjection; @@ -150,6 +151,22 @@ public override void ConfigureServices(ServiceConfigurationContext context) ConfigureDataProtection(context, configuration); ConfigureMiniProfiler(context, configuration); + // Trust X-Forwarded-For only from internal RFC-1918 proxies (OpenShift HAProxy router). + // This ensures RemoteIpAddress reflects the real client IP so the + // InternalNetworkHandler correctly blocks external callers reaching /metrics via ingress. + context.Services.Configure(options => + { + options.ForwardedHeaders = ForwardedHeaders.XForwardedFor | ForwardedHeaders.XForwardedProto; + options.ForwardLimit = 1; + // Clear defaults and allow the three RFC-1918 blocks plus loopback. + options.KnownProxies.Clear(); + options.KnownIPNetworks.Clear(); + options.KnownIPNetworks.Add(System.Net.IPNetwork.Parse("127.0.0.0/8")); + options.KnownIPNetworks.Add(System.Net.IPNetwork.Parse("10.0.0.0/8")); + options.KnownIPNetworks.Add(System.Net.IPNetwork.Parse("172.16.0.0/12")); + options.KnownIPNetworks.Add(System.Net.IPNetwork.Parse("192.168.0.0/16")); + }); + Configure(options => { options.TokenCookie.Expiration = TimeSpan.FromDays(365); @@ -558,6 +575,10 @@ public override void OnApplicationInitialization(ApplicationInitializationContex IdentityModelEventSource.ShowPII = true; } + // Rewrite RemoteIpAddress from X-Forwarded-For before any IP-based checks run. + // Trusted networks are configured in ConfigureServices above. + app.UseForwardedHeaders(); + app.UseAbpRequestLocalization(); if (env.IsProduction() || env.IsStaging()) diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs index 6debc33b41..62474357f4 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs @@ -23,16 +23,23 @@ public async static Task Main(string[] args) Console.WriteLine("Starting web host."); var builder = WebApplication.CreateBuilder(args); builder.Services.AddHttpContextAccessor(); + bool otlpEnabled = !string.IsNullOrWhiteSpace( + Environment.GetEnvironmentVariable("OTEL_EXPORTER_OTLP_ENDPOINT")); + builder.Services.AddOpenTelemetry() - .WithTracing(tracing => tracing - .AddAspNetCoreInstrumentation() - .AddHttpClientInstrumentation() - .AddOtlpExporter()) - .WithMetrics(metrics => metrics - .AddAspNetCoreInstrumentation() - .AddHttpClientInstrumentation() - .AddRuntimeInstrumentation() - .AddOtlpExporter()); + .WithTracing(tracing => + { + tracing.AddAspNetCoreInstrumentation() + .AddHttpClientInstrumentation(); + if (otlpEnabled) tracing.AddOtlpExporter(); + }) + .WithMetrics(metrics => + { + metrics.AddAspNetCoreInstrumentation() + .AddHttpClientInstrumentation() + .AddRuntimeInstrumentation(); + if (otlpEnabled) metrics.AddOtlpExporter(); + }); builder.Host.AddAppSettingsSecretsJson() .UseAutofac() .UseSerilog((hostingContext, loggerConfiguration) => From c96ea116a822d15c795fa31a3a77596f3b7de664 Mon Sep 17 00:00:00 2001 From: JamesPasta Date: Tue, 12 May 2026 09:17:22 -0700 Subject: [PATCH 08/13] feature/AB#32049-Prometheus --- .../src/Unity.GrantManager.Web/Program.cs | 19 ------------------- .../Unity.GrantManager.Web.csproj | 6 +----- 2 files changed, 1 insertion(+), 24 deletions(-) diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs index 62474357f4..f4e92e7a2e 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Program.cs @@ -2,8 +2,6 @@ using Microsoft.AspNetCore.Diagnostics.HealthChecks; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Hosting; -using OpenTelemetry.Metrics; -using OpenTelemetry.Trace; using Serilog; using System; using System.Threading.Tasks; @@ -23,23 +21,6 @@ public async static Task Main(string[] args) Console.WriteLine("Starting web host."); var builder = WebApplication.CreateBuilder(args); builder.Services.AddHttpContextAccessor(); - bool otlpEnabled = !string.IsNullOrWhiteSpace( - Environment.GetEnvironmentVariable("OTEL_EXPORTER_OTLP_ENDPOINT")); - - builder.Services.AddOpenTelemetry() - .WithTracing(tracing => - { - tracing.AddAspNetCoreInstrumentation() - .AddHttpClientInstrumentation(); - if (otlpEnabled) tracing.AddOtlpExporter(); - }) - .WithMetrics(metrics => - { - metrics.AddAspNetCoreInstrumentation() - .AddHttpClientInstrumentation() - .AddRuntimeInstrumentation(); - if (otlpEnabled) metrics.AddOtlpExporter(); - }); builder.Host.AddAppSettingsSecretsJson() .UseAutofac() .UseSerilog((hostingContext, loggerConfiguration) => diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj index ea3f46bc86..cc87311b1a 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Unity.GrantManager.Web.csproj @@ -78,11 +78,7 @@ - - - - - + From ec4bf5dabe8a42c206b9079ec926ebfb0c519c27 Mon Sep 17 00:00:00 2001 From: JamesPasta Date: Tue, 12 May 2026 09:49:33 -0700 Subject: [PATCH 09/13] feature/AB#32049-Prometheus --- .../src/Unity.GrantManager.Web/GrantManagerWebModule.cs | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs index c3eb88b711..72736b4ccf 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs @@ -623,10 +623,6 @@ public override void OnApplicationInitialization(ApplicationInitializationContex app.UseUnitOfWork(); app.UseAuthorization(); - app.UseEndpoints(endpoints => - { - endpoints.MapMetrics().RequireAuthorization(Unity.GrantManager.Web.Identity.Policy.PolicyRegistrant.MetricsAccessPolicy); - }); if (IsProfilingAllowed(env, configuration)) { app.UseMiniProfiler(); @@ -638,7 +634,10 @@ public override void OnApplicationInitialization(ApplicationInitializationContex }); app.UseAuditing(); app.UseAbpSerilogEnrichers(); - app.UseConfiguredEndpoints(); + app.UseConfiguredEndpoints(endpoints => + { + endpoints.MapMetrics().RequireAuthorization(Unity.GrantManager.Web.Identity.Policy.PolicyRegistrant.MetricsAccessPolicy); + }); var supportedCultures = new[] { From 9fabdf2f04f630d7dd9f496766dbe86c7b3be639 Mon Sep 17 00:00:00 2001 From: James Pasta <129337673+JamesPasta@users.noreply.github.com> Date: Tue, 12 May 2026 09:50:20 -0700 Subject: [PATCH 10/13] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- .../GrantManagerWebModule.cs | 42 +++++++++++++++---- 1 file changed, 33 insertions(+), 9 deletions(-) diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs index 72736b4ccf..3a6f891dff 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/GrantManagerWebModule.cs @@ -151,20 +151,44 @@ public override void ConfigureServices(ServiceConfigurationContext context) ConfigureDataProtection(context, configuration); ConfigureMiniProfiler(context, configuration); - // Trust X-Forwarded-For only from internal RFC-1918 proxies (OpenShift HAProxy router). - // This ensures RemoteIpAddress reflects the real client IP so the - // InternalNetworkHandler correctly blocks external callers reaching /metrics via ingress. + // Trust forwarded client IP headers only from explicitly configured ingress/router addresses. + // This ensures RemoteIpAddress reflects the real client IP only when the request came + // through a known proxy, so IP-based checks such as the /metrics policy cannot be spoofed + // by arbitrary internal callers. + var knownForwardedHeaderProxies = configuration + .GetSection("ForwardedHeaders:KnownProxies") + .Get() ?? Array.Empty(); + var knownForwardedHeaderNetworks = configuration + .GetSection("ForwardedHeaders:KnownNetworks") + .Get() ?? Array.Empty(); + context.Services.Configure(options => { - options.ForwardedHeaders = ForwardedHeaders.XForwardedFor | ForwardedHeaders.XForwardedProto; + options.ForwardedHeaders = ForwardedHeaders.XForwardedProto; options.ForwardLimit = 1; - // Clear defaults and allow the three RFC-1918 blocks plus loopback. options.KnownProxies.Clear(); options.KnownIPNetworks.Clear(); - options.KnownIPNetworks.Add(System.Net.IPNetwork.Parse("127.0.0.0/8")); - options.KnownIPNetworks.Add(System.Net.IPNetwork.Parse("10.0.0.0/8")); - options.KnownIPNetworks.Add(System.Net.IPNetwork.Parse("172.16.0.0/12")); - options.KnownIPNetworks.Add(System.Net.IPNetwork.Parse("192.168.0.0/16")); + + foreach (var proxy in knownForwardedHeaderProxies) + { + if (!string.IsNullOrWhiteSpace(proxy)) + { + options.KnownProxies.Add(System.Net.IPAddress.Parse(proxy)); + } + } + + foreach (var network in knownForwardedHeaderNetworks) + { + if (!string.IsNullOrWhiteSpace(network)) + { + options.KnownIPNetworks.Add(System.Net.IPNetwork.Parse(network)); + } + } + + if (options.KnownProxies.Count > 0 || options.KnownIPNetworks.Count > 0) + { + options.ForwardedHeaders |= ForwardedHeaders.XForwardedFor; + } }); Configure(options => From 95c94f018fba90a3c240e12e0e352874ea7bf0fd Mon Sep 17 00:00:00 2001 From: JamesPasta Date: Tue, 12 May 2026 10:08:11 -0700 Subject: [PATCH 11/13] feature/AB#32049-Prometheus --- .../Middleware/ExceptionCounterMiddleware.cs | 21 ++++++++++ .../ExceptionNotificationThrottle.cs | 39 ++++++++++--------- 2 files changed, 42 insertions(+), 18 deletions(-) diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs index e9a33146eb..73ab8b6b02 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Reflection; using System.Threading.Tasks; using Microsoft.AspNetCore.Http; using Microsoft.Extensions.DependencyInjection; @@ -7,6 +8,7 @@ using Prometheus; using Unity.GrantManager.Notifications; using Unity.Notifications.TeamsNotifications; +using Volo.Abp.Uow; namespace Unity.GrantManager.Web.Middleware; @@ -28,6 +30,20 @@ public class ExceptionCounterMiddleware( LabelNames = ["type"] }); + // Git SHA baked in at build time via -p:SourceRevisionId= in the Dockerfile. + // Format is "+" e.g. "1.0.0+a3f8c21"; we extract just the SHA. + private static readonly string CommitSha = ParseCommitSha( + typeof(ExceptionCounterMiddleware).Assembly + .GetCustomAttribute()? + .InformationalVersion); + + private static string ParseCommitSha(string? informationalVersion) + { + if (string.IsNullOrWhiteSpace(informationalVersion)) return "unknown"; + var plusIndex = informationalVersion.IndexOf('+'); + return plusIndex >= 0 ? informationalVersion[(plusIndex + 1)..] : informationalVersion; + } + public async Task InvokeAsync(HttpContext context) { try @@ -79,8 +95,11 @@ private void QueueTeamsNotification(HttpContext context, Exception ex) try { await using var scope = scopeFactory.CreateAsyncScope(); + var uowManager = scope.ServiceProvider.GetRequiredService(); var notifications = scope.ServiceProvider.GetRequiredService(); + using var uow = uowManager.Begin(requiresNew: true, isTransactional: false); + string activityTitle = $"[CRITICAL] {ex.GetType().Name}"; string activitySubtitle = $"Environment: {env} | {endpoint}"; @@ -90,6 +109,7 @@ private void QueueTeamsNotification(HttpContext context, Exception ex) new() { Name = "Message", Value = exMessage }, new() { Name = "Endpoint", Value = endpoint }, new() { Name = "Stack Trace", Value = stackTrace }, + new() { Name = "Commit", Value = CommitSha }, }; if (!string.IsNullOrEmpty(innerMessage)) @@ -98,6 +118,7 @@ private void QueueTeamsNotification(HttpContext context, Exception ex) } await notifications.PostToTeamsAsync(activityTitle, activitySubtitle, facts); + await uow.CompleteAsync(); } catch (Exception notifyEx) { diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionNotificationThrottle.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionNotificationThrottle.cs index 1c6e1d2422..f0ab071469 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionNotificationThrottle.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionNotificationThrottle.cs @@ -17,6 +17,9 @@ public sealed class ExceptionNotificationThrottle private const int GlobalMaxPerMinute = 5; private readonly ConcurrentDictionary _lastSent = new(); + + // _sentThisMinute and _windowStart are always accessed together under _lock + private readonly object _lock = new(); private int _sentThisMinute; private DateTimeOffset _windowStart = DateTimeOffset.UtcNow; @@ -26,33 +29,33 @@ public sealed class ExceptionNotificationThrottle /// public bool ShouldNotify(string exceptionTypeName) { - ResetWindowIfNeeded(); - - if (_sentThisMinute >= GlobalMaxPerMinute) - { - return false; - } - var now = DateTimeOffset.UtcNow; + // Per-type cooldown check — ConcurrentDictionary read is lock-free if (_lastSent.TryGetValue(exceptionTypeName, out var last) && now - last < PerTypeCooldown) { return false; } - _lastSent[exceptionTypeName] = now; - Interlocked.Increment(ref _sentThisMinute); - return true; - } - - private void ResetWindowIfNeeded() - { - var now = DateTimeOffset.UtcNow; - if (now - _windowStart >= TimeSpan.FromMinutes(1)) + lock (_lock) { - Interlocked.Exchange(ref _sentThisMinute, 0); - _windowStart = now; + // Reset the window if a full minute has elapsed + if (now - _windowStart >= TimeSpan.FromMinutes(1)) + { + _sentThisMinute = 0; + _windowStart = now; + } + + if (_sentThisMinute >= GlobalMaxPerMinute) + { + return false; + } + + _sentThisMinute++; } + + _lastSent[exceptionTypeName] = now; + return true; } } From 8cb15ad6249073b89e2ab79cfd2aed38429f104a Mon Sep 17 00:00:00 2001 From: JamesPasta Date: Tue, 12 May 2026 10:11:46 -0700 Subject: [PATCH 12/13] feature/AB#32049-Prometheus --- .../Middleware/ExceptionCounterMiddleware.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs index 73ab8b6b02..1e96b723d0 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionCounterMiddleware.cs @@ -19,7 +19,7 @@ public class ExceptionCounterMiddleware( { // Notify only in these environments; add "Staging" if desired private static readonly HashSet NotifyEnvironments = - new(StringComparer.OrdinalIgnoreCase) { "Production" }; + new(StringComparer.OrdinalIgnoreCase) { "Production", "Test", "Development" }; private static readonly Counter ExceptionCounter = Metrics.CreateCounter( From 9802719d7138b847ac666cf1f4de1af8fcd78881 Mon Sep 17 00:00:00 2001 From: JamesPasta Date: Tue, 12 May 2026 12:01:41 -0700 Subject: [PATCH 13/13] feature/AB#32049-Prometheus --- .../ExceptionNotificationThrottle.cs | 30 +++++++++---------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionNotificationThrottle.cs b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionNotificationThrottle.cs index f0ab071469..c41dc6f91d 100644 --- a/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionNotificationThrottle.cs +++ b/applications/Unity.GrantManager/src/Unity.GrantManager.Web/Middleware/ExceptionNotificationThrottle.cs @@ -1,6 +1,5 @@ using System; -using System.Collections.Concurrent; -using System.Threading; +using System.Collections.Generic; namespace Unity.GrantManager.Web.Middleware; @@ -16,10 +15,9 @@ public sealed class ExceptionNotificationThrottle // Global cap: at most N notifications per rolling minute across all types private const int GlobalMaxPerMinute = 5; - private readonly ConcurrentDictionary _lastSent = new(); - - // _sentThisMinute and _windowStart are always accessed together under _lock + // All state is accessed exclusively under _lock — no concurrent collections needed private readonly object _lock = new(); + private readonly Dictionary _lastSent = new(); private int _sentThisMinute; private DateTimeOffset _windowStart = DateTimeOffset.UtcNow; @@ -31,31 +29,31 @@ public bool ShouldNotify(string exceptionTypeName) { var now = DateTimeOffset.UtcNow; - // Per-type cooldown check — ConcurrentDictionary read is lock-free - if (_lastSent.TryGetValue(exceptionTypeName, out var last) && - now - last < PerTypeCooldown) - { - return false; - } - lock (_lock) { - // Reset the window if a full minute has elapsed + // Reset the global window if a full minute has elapsed if (now - _windowStart >= TimeSpan.FromMinutes(1)) { _sentThisMinute = 0; _windowStart = now; } + // Per-type cooldown check — inside the lock to prevent concurrent + // callers with the same exception type both passing the check + if (_lastSent.TryGetValue(exceptionTypeName, out var last) && + now - last < PerTypeCooldown) + { + return false; + } + if (_sentThisMinute >= GlobalMaxPerMinute) { return false; } _sentThisMinute++; + _lastSent[exceptionTypeName] = now; + return true; } - - _lastSent[exceptionTypeName] = now; - return true; } }