Skip to content

Commit 2aadbe5

Browse files
fix(ci): improve CI infrastructure reliability
- Add timeout-minutes to all workflow jobs - Add set -euo pipefail and quote variables in install-operator.sh Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
1 parent 8df5c2a commit 2aadbe5

7 files changed

Lines changed: 23 additions & 3 deletions

File tree

.github/workflows/config-checks.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ on:
2121
jobs:
2222
helm-lint:
2323
runs-on: ubuntu-latest
24+
timeout-minutes: 10
2425
steps:
2526
- name: Checkout code
2627
uses: actions/checkout@v6
@@ -31,6 +32,7 @@ jobs:
3132

3233
validate-csv:
3334
runs-on: ubuntu-latest
35+
timeout-minutes: 15
3436
steps:
3537
- name: Checkout code
3638
uses: actions/checkout@v6
@@ -47,6 +49,7 @@ jobs:
4749

4850
validate-helm-values:
4951
runs-on: ubuntu-latest
52+
timeout-minutes: 15
5053
steps:
5154
- name: Checkout code
5255
uses: actions/checkout@v6

.github/workflows/e2e-tests.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ jobs:
6565
e2e-tests-containerd:
6666
needs: [variables]
6767
runs-on: linux-amd64-cpu4
68+
timeout-minutes: 90
6869
permissions:
6970
contents: read
7071
id-token: write
@@ -130,6 +131,7 @@ jobs:
130131
e2e-tests-nvidiadriver:
131132
needs: [variables]
132133
runs-on: linux-amd64-cpu4
134+
timeout-minutes: 90
133135
permissions:
134136
contents: read
135137
id-token: write

.github/workflows/forward-compatibility.yaml

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ permissions: {}
2828
jobs:
2929
fetch-latest-images:
3030
runs-on: ubuntu-latest
31+
timeout-minutes: 30
3132
permissions:
3233
contents: read
3334
steps:
@@ -77,11 +78,15 @@ jobs:
7778

7879
notify-failure:
7980
runs-on: ubuntu-latest
81+
timeout-minutes: 10
8082
permissions: {}
8183
needs: [fetch-latest-images, run-e2e-tests]
82-
if: ${{ always() && (needs.fetch-latest-images.result == 'failure' || needs.run-e2e-tests.result == 'failure') && secrets.SLACK_BOT_TOKEN != '' }}
84+
if: ${{ always() && (needs.fetch-latest-images.result == 'failure' || needs.run-e2e-tests.result == 'failure') }}
85+
env:
86+
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }}
8387
steps:
8488
- name: Send Slack alert notification
89+
if: ${{ env.SLACK_BOT_TOKEN != '' }}
8590
uses: slackapi/slack-github-action@v2.1.1
8691
with:
8792
method: chat.postMessage

.github/workflows/golang-checks.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ on:
2121
jobs:
2222
go-check:
2323
runs-on: linux-amd64-cpu4
24+
timeout-minutes: 30
2425
permissions:
2526
contents: read
2627
id-token: write
@@ -55,6 +56,7 @@ jobs:
5556
go-test:
5657
name: unit tests
5758
runs-on: linux-amd64-cpu4
59+
timeout-minutes: 30
5860
permissions:
5961
contents: read
6062
id-token: write
@@ -80,6 +82,7 @@ jobs:
8082
8183
go-build:
8284
runs-on: linux-amd64-cpu4
85+
timeout-minutes: 30
8386
permissions:
8487
contents: read
8588
id-token: write

.github/workflows/image-builds.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ on:
3333
jobs:
3434
variables:
3535
runs-on: ubuntu-latest
36+
timeout-minutes: 5
3637
outputs:
3738
commit_short_sha: ${{ steps.vars.outputs.commit_short_sha }}
3839
label_image_source: ${{ steps.vars.outputs.label_image_source }}
@@ -77,6 +78,7 @@ jobs:
7778
build-gpu-operator-arm64:
7879
needs: [variables]
7980
runs-on: linux-arm64-cpu4
81+
timeout-minutes: 45
8082
permissions:
8183
contents: read
8284
id-token: write
@@ -108,6 +110,7 @@ jobs:
108110
build-gpu-operator-amd64:
109111
needs: [variables]
110112
runs-on: linux-amd64-cpu4
113+
timeout-minutes: 45
111114
permissions:
112115
contents: read
113116
id-token: write
@@ -139,6 +142,7 @@ jobs:
139142
build-multi-arch-images:
140143
needs: [variables, build-gpu-operator-arm64, build-gpu-operator-amd64]
141144
runs-on: ubuntu-latest
145+
timeout-minutes: 15
142146
steps:
143147
- uses: actions/checkout@v6
144148
name: Check out code

.github/workflows/release.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ permissions: {}
2929
jobs:
3030
variables:
3131
runs-on: ubuntu-latest
32+
timeout-minutes: 5
3233
outputs:
3334
commit_short_sha: ${{ steps.vars.outputs.commit_short_sha }}
3435
operator_image_base: ${{ steps.vars.outputs.operator_image_base }}
@@ -60,6 +61,7 @@ jobs:
6061
release-latest-gpu-operator-image:
6162
needs: [variables]
6263
runs-on: linux-amd64-cpu4
64+
timeout-minutes: 30
6365
permissions:
6466
contents: read
6567
packages: write

tests/scripts/install-operator.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
#!/bin/bash
2+
set -euo pipefail
23

34
if [[ "${SKIP_INSTALL}" == "true" ]]; then
45
echo "Skipping install: SKIP_INSTALL=${SKIP_INSTALL}"
56
exit 0
67
fi
78

89
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
9-
source ${SCRIPT_DIR}/.definitions.sh
10+
source "${SCRIPT_DIR}/.definitions.sh"
1011

11-
OPERATOR_REPOSITORY=$(dirname ${OPERATOR_IMAGE})
12+
OPERATOR_REPOSITORY=$(dirname "${OPERATOR_IMAGE}")
1213

1314
# Determine if we should use values file approach or --set flags
1415
USE_VALUES_FILE=false

0 commit comments

Comments
 (0)