From c43f2cd4adbde541de8a1ad8185849f773529b28 Mon Sep 17 00:00:00 2001 From: Hongkai Liu Date: Fri, 10 Apr 2026 18:25:29 -0400 Subject: [PATCH] OpenShiftUpdateRiskMightApply: bump pending to 15m from 10m This is to follow up a recent finding [1]. Further digging shows that the related risks to our e2e tests are `TestAlertFeatureE2ETestOTA1813`, `SyntheticRiskA`. They are in the pending state. This pull bump to a longer pending time so that it gives more time window to get e2e finish. It could avoid disruption from Production alerts. We could use `max by (namespace, risk, reason) (last_over_time(cluster_version_risk_conditions{job="cluster-version-operator", condition="Applies", risk!~"TestAlertFeatureE2ETest.*"}[5m]) != 0)` to ignore testing alerts, but it does not look good to have code handling special cases only for testing. This will recover the health of the TP-enabled jobs in CI. [1]. https://github.com/openshift/cluster-version-operator/pull/1367#issuecomment-4220247050 [2]. https://github.com/openshift/origin/pull/30929 --- ...ter-version-operator_02_prometheusrule_servicemonitor.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/install/0000_90_cluster-version-operator_02_prometheusrule_servicemonitor.yaml b/install/0000_90_cluster-version-operator_02_prometheusrule_servicemonitor.yaml index f4736ca85b..299912f740 100644 --- a/install/0000_90_cluster-version-operator_02_prometheusrule_servicemonitor.yaml +++ b/install/0000_90_cluster-version-operator_02_prometheusrule_servicemonitor.yaml @@ -16,11 +16,11 @@ spec: rules: - alert: OpenShiftUpdateRiskMightApply annotations: - summary: The cluster might have been exposed to the conditional update risk for 10 minutes. + summary: The cluster might have been exposed to the conditional update risk for 15 minutes. description: The conditional update risk {{ "{{ $labels.risk }}" }} might apply to the cluster because of {{ "{{ $labels.reason }}" }}, and the cluster update to a version exposed to the risk is not recommended. For more information refer to 'oc adm upgrade'. runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-version-operator/OpenShiftUpdateRiskMightApply.md expr: | max by (namespace, risk, reason) (last_over_time(cluster_version_risk_conditions{job="cluster-version-operator", condition="Applies"}[5m]) != 0) - for: 10m + for: 15m labels: severity: warning