forked from related-sciences/gce-github-runner
-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathaction.sh
More file actions
executable file
·362 lines (335 loc) · 11.3 KB
/
action.sh
File metadata and controls
executable file
·362 lines (335 loc) · 11.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
#!/usr/bin/env bash
ACTION_DIR="$( cd $( dirname "${BASH_SOURCE[0]}" ) >/dev/null 2>&1 && pwd )"
function usage {
echo "Usage: ${0} --command=[start|stop] <arguments>"
}
function safety_on {
set -o errexit -o pipefail -o noclobber -o nounset
}
function safety_off {
set +o errexit +o pipefail +o noclobber +o nounset
}
source "${ACTION_DIR}/vendor/getopts_long.sh"
command=
token=
project_id=
service_account_key=
runner_ver=
machine_zone=
machine_type=
boot_disk_type=
disk_size=
runner_service_account=
image_project=
image=
image_family=
network=
scopes=
shutdown_timeout=
subnet=
preemptible=
ephemeral=
no_external_address=
actions_preinstalled=
maintenance_policy_terminate=
arm=
accelerator=
num_instances=
labels=
max_run_duration=
OPTLIND=1
while getopts_long :h opt \
command required_argument \
token required_argument \
project_id required_argument \
service_account_key required_argument \
runner_ver required_argument \
machine_zone required_argument \
machine_type required_argument \
boot_disk_type optional_argument \
disk_size optional_argument \
runner_service_account optional_argument \
image_project optional_argument \
image optional_argument \
image_family optional_argument \
labels optional_argument \
max_run_duration optional_argument \
network optional_argument \
scopes required_argument \
shutdown_timeout required_argument \
subnet optional_argument \
preemptible required_argument \
ephemeral required_argument \
no_external_address required_argument \
actions_preinstalled required_argument \
arm required_argument \
maintenance_policy_terminate optional_argument \
accelerator optional_argument \
num_instances required_argument \
help no_argument "" "$@"
do
case "$opt" in
command)
command=$OPTLARG
;;
token)
token=$OPTLARG
;;
project_id)
project_id=$OPTLARG
;;
service_account_key)
service_account_key="$OPTLARG"
;;
runner_ver)
runner_ver=$OPTLARG
;;
machine_zone)
machine_zone=$OPTLARG
;;
machine_type)
machine_type=$OPTLARG
;;
boot_disk_type)
boot_disk_type=${OPTLARG-$boot_disk_type}
;;
disk_size)
disk_size=${OPTLARG-$disk_size}
;;
runner_service_account)
runner_service_account=${OPTLARG-$runner_service_account}
;;
image_project)
image_project=${OPTLARG-$image_project}
;;
image)
image=${OPTLARG-$image}
;;
image_family)
image_family=${OPTLARG-$image_family}
;;
labels)
labels=$OPTLARG
;;
max_run_duration)
max_run_duration=$OPTLARG
;;
network)
network=${OPTLARG-$network}
;;
scopes)
scopes=$OPTLARG
;;
shutdown_timeout)
shutdown_timeout=$OPTLARG
;;
subnet)
subnet=${OPTLARG-$subnet}
;;
preemptible)
preemptible=$OPTLARG
;;
ephemeral)
ephemeral=$OPTLARG
;;
no_external_address)
no_external_address=$OPTLARG
;;
actions_preinstalled)
actions_preinstalled=$OPTLARG
;;
maintenance_policy_terminate)
maintenance_policy_terminate=${OPTLARG-$maintenance_policy_terminate}
;;
arm)
arm=$OPTLARG
;;
num_instances)
num_instances=$OPTLARG
;;
accelerator)
accelerator=$OPTLARG
;;
h|help)
usage
exit 0
;;
:)
printf >&2 '%s: %s\n' "${0##*/}" "$OPTLERR"
usage
exit 1
;;
esac
done
function gcloud_auth {
# NOTE: when --project is specified, it updates the config
echo ${service_account_key} | gcloud --project ${project_id} --quiet auth activate-service-account --key-file - &>/dev/null
echo "✅ Successfully configured gcloud."
}
function start_vm {
echo "Starting GCE VM ..."
if [[ -z "${service_account_key}" ]] || [[ -z "${project_id}" ]]; then
echo "Won't authenticate gcloud. If you wish to authenticate gcloud provide both service_account_key and project_id."
else
echo "Will authenticate gcloud."
gcloud_auth
fi
RUNNER_TOKEN=$(curl -S -s -XPOST \
-H "authorization: Bearer ${token}" \
https://api.github.com/repos/${GITHUB_REPOSITORY}/actions/runners/registration-token |\
jq -r .token)
echo "✅ Successfully got the GitHub Runner registration token"
VM_ID="gce-gh-runner-${GITHUB_RUN_ID}-${GITHUB_RUN_ATTEMPT}"
service_account_flag=$([[ -z "${runner_service_account}" ]] || echo "--service-account=${runner_service_account}")
image_project_flag=$([[ -z "${image_project}" ]] || echo "--image-project=${image_project}")
image_flag=$([[ -z "${image}" ]] || echo "--image=${image}")
image_family_flag=$([[ -z "${image_family}" ]] || echo "--image-family=${image_family}")
disk_size_flag=$([[ -z "${disk_size}" ]] || echo "--boot-disk-size=${disk_size}")
boot_disk_type_flag=$([[ -z "${boot_disk_type}" ]] || echo "--boot-disk-type=${boot_disk_type}")
preemptible_flag=$([[ "${preemptible}" == "true" ]] && echo "--preemptible" || echo "")
ephemeral_flag=$([[ "${ephemeral}" == "true" ]] && echo "--ephemeral" || echo "")
no_external_address_flag=$([[ "${no_external_address}" == "true" ]] && echo "--no-address" || echo "")
network_flag=$([[ ! -z "${network}" ]] && echo "--network=${network}" || echo "")
subnet_flag=$([[ ! -z "${subnet}" ]] && echo "--subnet=${subnet}" || echo "")
accelerator=$([[ ! -z "${accelerator}" ]] && echo "--accelerator=${accelerator} --maintenance-policy=TERMINATE" || echo "")
maintenance_policy_flag=$([[ -z "${maintenance_policy_terminate}" ]] || echo "--maintenance-policy=TERMINATE" )
labels_flag=$([[ ! -z "${labels}" ]] && echo "--labels=gh_ready=0,vm_id=${VM_ID},${labels}" || echo "--labels=gh_ready=0,vm_id=${VM_ID}")
max_run_duration_flag=$([[ -z "${max_run_duration}" ]] || echo "--max-run-duration=${max_run_duration} --instance-termination-action=DELETE")
echo "The new GCE VM will be ${VM_ID}"
startup_script="
# Create a systemd service in charge of shutting down the machine once the workflow has finished
cat <<-EOF > /etc/systemd/system/shutdown.sh
#!/bin/sh
sleep ${shutdown_timeout}
# ensure the active gcloud account in the shutdown script is the same one configured on instance creation
gcloud config set account \$(gcloud auth list --filter=status:ACTIVE --format=\"value(account)\")
instance=\$(hostname)
gcloud compute instances delete \\\${instance} --zone=$machine_zone --quiet
EOF
cat <<-EOF > /etc/systemd/system/shutdown.service
[Unit]
Description=Shutdown service
[Service]
ExecStart=/etc/systemd/system/shutdown.sh
[Install]
WantedBy=multi-user.target
EOF
chmod +x /etc/systemd/system/shutdown.sh
systemctl daemon-reload
systemctl enable shutdown.service
cat <<-EOF > /usr/bin/gce_runner_shutdown.sh
#!/bin/sh
instance=\$(hostname)
echo \"✅ Self deleting \\\${instance} in ${machine_zone} in ${shutdown_timeout} seconds ...\"
# We tear down the machine by starting the systemd service that was registered by the startup script
systemctl start shutdown.service
EOF
# See: https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/running-scripts-before-or-after-a-job
echo "ACTIONS_RUNNER_HOOK_JOB_COMPLETED=/usr/bin/gce_runner_shutdown.sh" >.env
instance=\$(hostname)
gcloud compute instances add-labels \${instance} --zone=${machine_zone} --labels=gh_ready=0 && \\
RUNNER_ALLOW_RUNASROOT=1 ./config.sh --url https://github.com/${GITHUB_REPOSITORY} --token ${RUNNER_TOKEN} --labels ${VM_ID} --unattended ${ephemeral_flag} --disableupdate && \\
./svc.sh install && \\
./svc.sh start && \\
gcloud compute instances add-labels \${instance} --zone=${machine_zone} --labels=gh_ready=1
# 3 days represents the max workflow runtime. This will shutdown the instance if everything else fails.
nohup sh -c \"sleep 3d && gcloud --quiet compute instances delete \${instance} --zone=${machine_zone}\" > /dev/null &
"
if $actions_preinstalled ; then
echo "✅ Startup script won't install GitHub Actions (pre-installed)"
startup_script="#!/bin/bash
cd /actions-runner
$startup_script"
else
if [[ "$runner_ver" = "latest" ]]; then
latest_ver=$(curl -sL --fail -H "Authorization: Bearer ${token}" \
https://api.github.com/repos/actions/runner/releases/latest | \
jq -r '.tag_name' | sed -e 's/^v//') || {
echo "❌ Failed to fetch latest runner version"; exit 1; }
runner_ver="$latest_ver"
echo "✅ runner_ver=latest is specified. v$latest_ver is detected as the latest version."
fi
echo "✅ Startup script will install GitHub Actions v$runner_ver"
if $arm ; then
startup_script="#!/bin/bash
mkdir /actions-runner
cd /actions-runner
curl -o actions-runner-linux-arm64-${runner_ver}.tar.gz -L https://github.com/actions/runner/releases/download/v${runner_ver}/actions-runner-linux-arm64-${runner_ver}.tar.gz
tar xzf ./actions-runner-linux-arm64-${runner_ver}.tar.gz
./bin/installdependencies.sh
$startup_script"
else
startup_script="#!/bin/bash
mkdir /actions-runner
cd /actions-runner
curl -o actions-runner-linux-x64-${runner_ver}.tar.gz -L https://github.com/actions/runner/releases/download/v${runner_ver}/actions-runner-linux-x64-${runner_ver}.tar.gz
tar xzf ./actions-runner-linux-x64-${runner_ver}.tar.gz
./bin/installdependencies.sh
$startup_script"
fi
fi
gcloud compute instances bulk create \
--name-pattern="${VM_ID}-#" \
--count=${num_instances} \
--min-count=${num_instances} \
--zone=${machine_zone} \
${disk_size_flag} \
${boot_disk_type_flag} \
--machine-type=${machine_type} \
--scopes=${scopes} \
${service_account_flag} \
${image_project_flag} \
${image_flag} \
${image_family_flag} \
${preemptible_flag} \
${no_external_address_flag} \
${subnet_flag} \
${accelerator} \
${maintenance_policy_flag} \
${labels_flag} \
${max_run_duration_flag} \
--metadata=startup-script="$startup_script"
echo "label=${VM_ID}" >> $GITHUB_OUTPUT
safety_off
launched_instances=$(gcloud compute instances list --filter "labels.vm_id=${VM_ID}" --format='get(name)')
if [ -z "$launched_instances" ]; then
echo "Failed to launch VMs"
exit 1
fi
for instance in $launched_instances; do
while (( i++ < 60 )); do
GH_READY=$(gcloud compute instances describe ${instance} --zone=${machine_zone} --format='json(labels)' | jq -r .labels.gh_ready)
if [[ $GH_READY == 1 ]]; then
break
fi
echo "${instance} not ready yet, waiting 5 secs ..."
sleep 5
done
if [[ $GH_READY == 1 ]]; then
echo "✅ ${instance} ready ..."
else
echo "Waited 5 minutes for ${instance}, without luck, deleting ${instance} ..."
gcloud --quiet compute instances delete ${instance} --zone=${machine_zone}
# NOTE: if one instance fails and then we exit, we also need to clean up any other
# launched instances
for extra_instance in $launched_instances; do
if [[ $extra_instance != $instance ]]; then
echo "Deleting ${extra_instance} ..."
gcloud --quiet compute instances delete ${extra_instance} --zone=${machine_zone}
fi
done
exit 1
fi
done
}
safety_on
case "$command" in
start)
start_vm
;;
*)
echo "Invalid command: \`${command}\`, valid values: start" >&2
usage
exit 1
;;
esac