@@ -14,6 +14,7 @@ import (
1414 "os/exec"
1515 "path/filepath"
1616 "reflect"
17+ "strconv"
1718 "strings"
1819 "time"
1920
@@ -88,6 +89,13 @@ func RegisterSteps(sc *godog.ScenarioContext) {
8889 sc .Step (`^(?i)resource apply fails with error msg containing "([^"]+)"$` , ResourceApplyFails )
8990 sc .Step (`^(?i)resource "([^"]+)" is eventually restored$` , ResourceRestored )
9091 sc .Step (`^(?i)resource "([^"]+)" matches$` , ResourceMatches )
92+ sc .Step (`^(?i)user performs rollout restart on "([^"]+)"$` , UserPerformsRolloutRestart )
93+ sc .Step (`^(?i)deployment "([^"]+)" has restart annotation$` , ResourceHasRestartAnnotation )
94+ sc .Step (`^(?i)deployment "([^"]+)" is ready$` , DeploymentIsReady )
95+ sc .Step (`^(?i)deployment "([^"]+)" rollout completes successfully$` , DeploymentRolloutCompletesSuccessfully )
96+ sc .Step (`^(?i)I wait for "([^"]+)" seconds$` , WaitForSeconds )
97+ sc .Step (`^(?i)deployment "([^"]+)" rollout is still successful$` , DeploymentRolloutIsStillSuccessful )
98+ sc .Step (`^(?i)deployment "([^"]+)" has expected number of ready replicas$` , DeploymentHasExpectedReadyReplicas )
9199
92100 sc .Step (`^(?i)ServiceAccount "([^"]*)" with needed permissions is available in test namespace$` , ServiceAccountWithNeededPermissionsIsAvailableInNamespace )
93101 sc .Step (`^(?i)ServiceAccount "([^"]*)" with needed permissions is available in \${TEST_NAMESPACE}$` , ServiceAccountWithNeededPermissionsIsAvailableInNamespace )
@@ -1288,3 +1296,251 @@ func latestActiveRevisionForExtension(extName string) (*ocv1.ClusterExtensionRev
12881296
12891297 return latest , nil
12901298}
1299+
1300+ // UserPerformsRolloutRestart simulates a user running "kubectl rollout restart deployment/<name>".
1301+ // This adds a restart annotation to trigger a rolling restart of pods.
1302+ // This is used to test the generic fix - OLM should not undo ANY user-added annotations.
1303+ // In OLMv0, OLM would undo this change. In OLMv1, it should stay because kubectl owns it.
1304+ // See: https://github.com/operator-framework/operator-lifecycle-manager/issues/3392
1305+ func UserPerformsRolloutRestart (ctx context.Context , resourceName string ) error {
1306+ sc := scenarioCtx (ctx )
1307+ resourceName = substituteScenarioVars (resourceName , sc )
1308+
1309+ kind , deploymentName , ok := strings .Cut (resourceName , "/" )
1310+ if ! ok {
1311+ return fmt .Errorf ("invalid resource name format: %s (expected kind/name)" , resourceName )
1312+ }
1313+
1314+ if kind != "deployment" {
1315+ return fmt .Errorf ("only deployment resources are supported for restart annotation, got: %s" , kind )
1316+ }
1317+
1318+ // Run kubectl rollout restart to add the restart annotation.
1319+ // This is the real command users run, so we test actual user behavior.
1320+ out , err := k8sClient ("rollout" , "restart" , resourceName , "-n" , sc .namespace )
1321+ if err != nil {
1322+ return fmt .Errorf ("failed to rollout restart %s: %w" , resourceName , err )
1323+ }
1324+
1325+ logger .V (1 ).Info ("Rollout restart initiated" , "deployment" , deploymentName , "output" , out )
1326+
1327+ return nil
1328+ }
1329+
1330+ // ResourceHasRestartAnnotation checks that a deployment has a restart annotation.
1331+ // This confirms that user changes stay in place after OLM runs.
1332+ func ResourceHasRestartAnnotation (ctx context.Context , deploymentName string ) error {
1333+ sc := scenarioCtx (ctx )
1334+ deploymentName = substituteScenarioVars (deploymentName , sc )
1335+
1336+ restartAnnotationKey := "kubectl.kubernetes.io/restartedAt"
1337+
1338+ var annotationValue string
1339+ waitFor (ctx , func () bool {
1340+ out , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1341+ "-o" , fmt .Sprintf ("jsonpath={.spec.template.metadata.annotations['%s']}" , restartAnnotationKey ))
1342+ if err != nil {
1343+ return false
1344+ }
1345+ // If the annotation exists and has a value, it stayed in place
1346+ if out == "" {
1347+ return false
1348+ }
1349+ annotationValue = out
1350+ return true
1351+ })
1352+
1353+ logger .V (1 ).Info ("Restart annotation found" , "deployment" , deploymentName , "restartedAt" , annotationValue )
1354+ return nil
1355+ }
1356+
1357+ // DeploymentIsReady checks that a deployment is ready with all pods running.
1358+ func DeploymentIsReady (ctx context.Context , deploymentName string ) error {
1359+ sc := scenarioCtx (ctx )
1360+ deploymentName = substituteScenarioVars (deploymentName , sc )
1361+
1362+ waitFor (ctx , func () bool {
1363+ // Check if deployment is ready
1364+ out , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1365+ "-o" , "jsonpath={.status.conditions[?(@.type=='Available')].status}" )
1366+ if err != nil {
1367+ return false
1368+ }
1369+ return out == "True"
1370+ })
1371+
1372+ logger .V (1 ).Info ("Deployment is ready" , "deployment" , deploymentName )
1373+ return nil
1374+ }
1375+
1376+ // DeploymentRolloutCompletesSuccessfully waits for the rollout to finish.
1377+ // This checks that new pods were created and are running.
1378+ func DeploymentRolloutCompletesSuccessfully (ctx context.Context , deploymentName string ) error {
1379+ sc := scenarioCtx (ctx )
1380+ deploymentName = substituteScenarioVars (deploymentName , sc )
1381+
1382+ timeoutArg := fmt .Sprintf ("--timeout=%s" , timeout )
1383+ out , err := k8sClient ("rollout" , "status" , "deployment/" + deploymentName , "-n" , sc .namespace , timeoutArg )
1384+ if err != nil {
1385+ return fmt .Errorf ("deployment rollout failed: %w, output: %s" , err , out )
1386+ }
1387+
1388+ logger .V (1 ).Info ("Deployment rollout completed" , "deployment" , deploymentName , "status" , out )
1389+
1390+ // Check deployment status
1391+ available , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1392+ "-o" , "jsonpath={.status.conditions[?(@.type=='Available')].status}" )
1393+ if err != nil {
1394+ return fmt .Errorf ("failed to check deployment availability: %w" , err )
1395+ }
1396+ if available != "True" {
1397+ return fmt .Errorf ("deployment %s is not available" , deploymentName )
1398+ }
1399+
1400+ progressing , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1401+ "-o" , "jsonpath={.status.conditions[?(@.type=='Progressing')].status}" )
1402+ if err != nil {
1403+ return fmt .Errorf ("failed to check deployment progressing: %w" , err )
1404+ }
1405+ if progressing != "True" {
1406+ return fmt .Errorf ("deployment %s is not progressing correctly" , deploymentName )
1407+ }
1408+
1409+ return nil
1410+ }
1411+
1412+ // WaitForSeconds waits for the given number of seconds.
1413+ // This gives OLM time to run its checks between test steps.
1414+ //
1415+ // Note: We wait for a fixed time (not checking in a loop) because we need to make sure
1416+ // OLM actually runs (it runs every 10 seconds). We want to check that user changes stay
1417+ // AFTER OLM runs. If we just checked in a loop, we wouldn't know if OLM ran yet.
1418+ func WaitForSeconds (ctx context.Context , seconds string ) error {
1419+ sec , err := strconv .Atoi (seconds )
1420+ if err != nil {
1421+ return fmt .Errorf ("invalid seconds value %s: %w" , seconds , err )
1422+ }
1423+
1424+ if sec <= 0 {
1425+ return fmt .Errorf ("seconds value must be greater than 0, got %d" , sec )
1426+ }
1427+
1428+ logger .V (1 ).Info ("Waiting for reconciliation" , "seconds" , sec )
1429+
1430+ timer := time .NewTimer (time .Duration (sec ) * time .Second )
1431+ defer timer .Stop ()
1432+
1433+ select {
1434+ case <- timer .C :
1435+ logger .V (1 ).Info ("Wait complete" )
1436+ return nil
1437+ case <- ctx .Done ():
1438+ return fmt .Errorf ("wait canceled: %w" , ctx .Err ())
1439+ }
1440+ }
1441+
1442+ // verifyDeploymentReplicaStatus checks if a deployment has the right number of ready pods.
1443+ func verifyDeploymentReplicaStatus (deploymentName , namespace string ) (string , string , error ) {
1444+ readyReplicas , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , namespace ,
1445+ "-o" , "jsonpath={.status.readyReplicas}" )
1446+ if err != nil {
1447+ return "" , "" , fmt .Errorf ("failed to get ready replicas: %w" , err )
1448+ }
1449+
1450+ replicas , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , namespace ,
1451+ "-o" , "jsonpath={.spec.replicas}" )
1452+ if err != nil {
1453+ return "" , "" , fmt .Errorf ("failed to get desired replicas: %w" , err )
1454+ }
1455+
1456+ // Normalize empty jsonpath results (when readyReplicas is omitted for 0 replicas)
1457+ readyReplicas = strings .TrimSpace (readyReplicas )
1458+ replicas = strings .TrimSpace (replicas )
1459+ if readyReplicas == "" {
1460+ readyReplicas = "0"
1461+ }
1462+ if replicas == "" {
1463+ replicas = "0"
1464+ }
1465+
1466+ // Compare as integers to avoid false negatives
1467+ readyInt , err := strconv .Atoi (readyReplicas )
1468+ if err != nil {
1469+ return readyReplicas , replicas , fmt .Errorf ("invalid ready replicas value %q: %w" , readyReplicas , err )
1470+ }
1471+ replicasInt , err := strconv .Atoi (replicas )
1472+ if err != nil {
1473+ return readyReplicas , replicas , fmt .Errorf ("invalid desired replicas value %q: %w" , replicas , err )
1474+ }
1475+
1476+ if readyInt != replicasInt {
1477+ return readyReplicas , replicas , fmt .Errorf ("deployment %s has %d ready replicas but expected %d" ,
1478+ deploymentName , readyInt , replicasInt )
1479+ }
1480+
1481+ return readyReplicas , replicas , nil
1482+ }
1483+
1484+ // DeploymentRolloutIsStillSuccessful checks that the rollout is still working.
1485+ // This makes sure OLM didn't undo the user's "kubectl rollout restart" command.
1486+ // It checks that new pods are still running.
1487+ func DeploymentRolloutIsStillSuccessful (ctx context.Context , deploymentName string ) error {
1488+ sc := scenarioCtx (ctx )
1489+ deploymentName = substituteScenarioVars (deploymentName , sc )
1490+
1491+ // Check deployment status
1492+ available , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1493+ "-o" , "jsonpath={.status.conditions[?(@.type=='Available')].status}" )
1494+ if err != nil {
1495+ return fmt .Errorf ("failed to check deployment availability: %w" , err )
1496+ }
1497+ if available != "True" {
1498+ return fmt .Errorf ("deployment %s is not available - rollout was undone" , deploymentName )
1499+ }
1500+
1501+ // Check that the deployment is still working correctly
1502+ progressing , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1503+ "-o" , "jsonpath={.status.conditions[?(@.type=='Progressing')].status}" )
1504+ if err != nil {
1505+ return fmt .Errorf ("failed to check deployment progressing: %w" , err )
1506+ }
1507+ if progressing != "True" {
1508+ return fmt .Errorf ("deployment %s is not working - rollout may have been undone" , deploymentName )
1509+ }
1510+
1511+ // Check that the right number of pods are ready (rollout finished and wasn't stopped)
1512+ readyReplicas , replicas , err := verifyDeploymentReplicaStatus (deploymentName , sc .namespace )
1513+ if err != nil {
1514+ return fmt .Errorf ("%w - rollout may have been undone" , err )
1515+ }
1516+
1517+ logger .V (1 ).Info ("Deployment rollout is still successful" , "deployment" , deploymentName ,
1518+ "readyReplicas" , readyReplicas , "desiredReplicas" , replicas )
1519+
1520+ return nil
1521+ }
1522+
1523+ // DeploymentHasExpectedReadyReplicas checks that the deployment has the right number of ready pods.
1524+ // This makes sure the rollout finished successfully and pods are running.
1525+ func DeploymentHasExpectedReadyReplicas (ctx context.Context , deploymentName string ) error {
1526+ sc := scenarioCtx (ctx )
1527+ deploymentName = substituteScenarioVars (deploymentName , sc )
1528+
1529+ // Check that the right number of pods are ready
1530+ readyReplicas , replicas , err := verifyDeploymentReplicaStatus (deploymentName , sc .namespace )
1531+ if err != nil {
1532+ return err
1533+ }
1534+
1535+ // Also check that there are no unavailable pods
1536+ unavailableReplicas , err := k8sClient ("get" , "deployment" , deploymentName , "-n" , sc .namespace ,
1537+ "-o" , "jsonpath={.status.unavailableReplicas}" )
1538+ if err == nil && unavailableReplicas != "" && unavailableReplicas != "0" {
1539+ return fmt .Errorf ("deployment %s has %s unavailable pods" , deploymentName , unavailableReplicas )
1540+ }
1541+
1542+ logger .V (1 ).Info ("Deployment has expected ready replicas" , "deployment" , deploymentName ,
1543+ "readyReplicas" , readyReplicas , "desiredReplicas" , replicas )
1544+
1545+ return nil
1546+ }
0 commit comments