-
Notifications
You must be signed in to change notification settings - Fork 137
Expand file tree
/
Copy pathtest.sh
More file actions
44 lines (39 loc) · 1.29 KB
/
test.sh
File metadata and controls
44 lines (39 loc) · 1.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/bin/bash
gpus=`rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ' '`
ngpus=`echo "$gpus" | tr -d '[:space:]' | wc -c`
device_opts=""
if [ "$job_device" = "gpu" ]; then
device_opts+="--gpu"
if [ "$job_interface" = "acc" ]; then
device_opts+=" acc"
elif [ "$job_interface" = "omp" ]; then
device_opts+=" mp"
fi
else
device_opts+=" --no-gpu"
fi
# Build source code on compute node (deps already fetched on login node)
max_attempts=3
attempt=1
while [ $attempt -le $max_attempts ]; do
echo "Build attempt $attempt of $max_attempts..."
if ./mfc.sh test -v -a --dry-run --rdma-mpi -j 8 $device_opts; then
echo "Build succeeded on attempt $attempt."
break
fi
if [ $attempt -lt $max_attempts ]; then
echo "Build failed on attempt $attempt. Cleaning source targets and retrying in 30s..."
./mfc.sh clean -t pre_process simulation post_process syscheck
sleep 30
else
echo "Build failed after $max_attempts attempts."
exit 1
fi
attempt=$((attempt + 1))
done
# Run tests
if [ "$job_device" = "gpu" ]; then
./mfc.sh test -v -a --rdma-mpi --max-attempts 3 -j $ngpus $device_opts -- -c frontier
else
./mfc.sh test -v -a --max-attempts 3 -j 32 --no-gpu -- -c frontier
fi