Skip to content

Commit 2e61b3c

Browse files
core: Add CPU and I/O resource limits to run-service
Extend the service resource limitation system beyond memory to include: - CPU limits via cgroups v2 cpu.max (percentage of cores) - I/O bandwidth limits via cgroups v2 io.max (read/write MB/s) Service tuple format updated to: NAME,MEMORY_MB,CPU_PERCENT,IO_READ_MBPS,IO_WRITE_MBPS,COMMAND Environment variables to disable limits: - BLUEOS_DISABLE_RESOURCE_LIMITS: disables all limits - BLUEOS_DISABLE_MEMORY_LIMIT: disables memory limit - BLUEOS_DISABLE_CPU_LIMIT: disables CPU limit - BLUEOS_DISABLE_IO_LIMIT: disables I/O limits
1 parent 9abca66 commit 2e61b3c

2 files changed

Lines changed: 146 additions & 45 deletions

File tree

core/run-service.sh

Lines changed: 82 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22

33
service_name=$1
44
service_command=$2
5-
memory_limit_mb=$3
5+
memory_limit_mb=${3:-0}
6+
cpu_limit_percent=${4:-0}
7+
io_read_mbps=${5:-0}
8+
io_write_mbps=${6:-0}
9+
610
memory_limit_bytes=$((memory_limit_mb * 1024 * 1024))
711
LOG_FILE="/var/logs/blueos/run-service.log"
812

@@ -11,8 +15,66 @@ CHILD_CGROUP="/sys/fs/cgroup/$DOCKER_CGROUP/$service_name"
1115
# Create a new cgroup for the service
1216
mkdir -p "$CHILD_CGROUP"
1317

14-
# Set memory limit for the cgroup
15-
echo "$memory_limit_bytes" > "$CHILD_CGROUP/memory.max"
18+
# Set memory limit for the cgroup (0 = no limit)
19+
if [ "$memory_limit_bytes" -gt 0 ]; then
20+
echo "$memory_limit_bytes" > "$CHILD_CGROUP/memory.max"
21+
fi
22+
23+
# Set CPU limit for the cgroup (0 = no limit)
24+
# cpu.max format: "QUOTA PERIOD" in microseconds
25+
# Example: "50000 100000" means 50% of one CPU core
26+
if [ "$cpu_limit_percent" -gt 0 ]; then
27+
CPU_PERIOD=100000
28+
CPU_QUOTA=$((cpu_limit_percent * CPU_PERIOD / 100))
29+
echo "$CPU_QUOTA $CPU_PERIOD" > "$CHILD_CGROUP/cpu.max"
30+
fi
31+
32+
# Set I/O limits for the cgroup (0 = no limit)
33+
# io.max format: "MAJOR:MINOR rbps=BYTES wbps=BYTES"
34+
if [ "$io_read_mbps" -gt 0 ] || [ "$io_write_mbps" -gt 0 ]; then
35+
# Get the major:minor of the actual block device
36+
# In Docker containers with overlay fs, we need to find the underlying block device
37+
# Note: cgroups v2 I/O limiting works on whole block devices, not partitions
38+
# So we use mmcblk0 (not mmcblk0p2), sda (not sda1), etc.
39+
ROOT_MAJOR=""
40+
ROOT_MINOR=""
41+
42+
# Try whole block devices (not partitions) - order matters for Raspberry Pi
43+
for DEV in /dev/mmcblk0 /dev/sda /dev/nvme0n1; do
44+
if [ -b "$DEV" ]; then
45+
ROOT_MAJOR=$(stat -c '%t' "$DEV" 2>/dev/null)
46+
ROOT_MINOR=$(stat -c '%T' "$DEV" 2>/dev/null)
47+
if [ -n "$ROOT_MAJOR" ] && [ -n "$ROOT_MINOR" ]; then
48+
# Convert from hex to decimal
49+
ROOT_MAJOR=$((16#$ROOT_MAJOR))
50+
ROOT_MINOR=$((16#$ROOT_MINOR))
51+
break
52+
fi
53+
fi
54+
done
55+
56+
# Skip I/O limiting if no valid block device found
57+
if [ -z "$ROOT_MAJOR" ] || [ -z "$ROOT_MINOR" ]; then
58+
echo "Warning: Could not find block device for I/O limiting"
59+
else
60+
IO_LIMIT_STR="$ROOT_MAJOR:$ROOT_MINOR"
61+
if [ "$io_read_mbps" -gt 0 ]; then
62+
IO_READ_BPS=$((io_read_mbps * 1024 * 1024))
63+
IO_LIMIT_STR="$IO_LIMIT_STR rbps=$IO_READ_BPS"
64+
fi
65+
if [ "$io_write_mbps" -gt 0 ]; then
66+
IO_WRITE_BPS=$((io_write_mbps * 1024 * 1024))
67+
IO_LIMIT_STR="$IO_LIMIT_STR wbps=$IO_WRITE_BPS"
68+
fi
69+
echo "$IO_LIMIT_STR" > "$CHILD_CGROUP/io.max"
70+
fi
71+
fi
72+
73+
# Check if any resource limit is enabled
74+
has_any_limit() {
75+
[ "$memory_limit_bytes" -gt 0 ] || [ "$cpu_limit_percent" -gt 0 ] || \
76+
[ "$io_read_mbps" -gt 0 ] || [ "$io_write_mbps" -gt 0 ]
77+
}
1678

1779
# find PIDs for all children of a given process
1880
findpids() {
@@ -32,9 +94,9 @@ start_service() {
3294

3395
add_to_cgroup() {
3496
local pid=$1
35-
# Check if the process exists and memory limit is set
36-
if ! ps -p $pid > /dev/null || [ $memory_limit_bytes -eq 0 ]; then
37-
# process doesn't exist. presume it is already dead
97+
# Check if the process exists and any limit is set
98+
if ! ps -p $pid > /dev/null || ! has_any_limit; then
99+
# process doesn't exist or no limits set
38100
return
39101
fi
40102
echo $pid > $CHILD_CGROUP/cgroup.procs
@@ -61,12 +123,23 @@ start_service() {
61123
return $?
62124
}
63125

64-
# Continuously run the service, restarting if it stops or exceeds memory limit
126+
# Build limits description for logging
127+
get_limits_description() {
128+
local desc=""
129+
[ "$memory_limit_mb" -gt 0 ] && desc="${desc}mem=${memory_limit_mb}MB "
130+
[ "$cpu_limit_percent" -gt 0 ] && desc="${desc}cpu=${cpu_limit_percent}% "
131+
[ "$io_read_mbps" -gt 0 ] && desc="${desc}io_r=${io_read_mbps}MB/s "
132+
[ "$io_write_mbps" -gt 0 ] && desc="${desc}io_w=${io_write_mbps}MB/s "
133+
[ -z "$desc" ] && desc="none"
134+
echo "$desc"
135+
}
136+
137+
# Continuously run the service, restarting if it stops or exceeds resource limits
65138
while true; do
66-
echo "Starting service: $service_command with memory limit: $memory_limit_bytes bytes "
139+
echo "Starting service: $service_command with limits: $(get_limits_description)"
67140
if ! start_service; then
68141
timestamp=$(date '+%Y-%m-%d %H:%M:%S')
69-
echo "$timestamp: Service ($service_command) exceeded memory limit or stopped. Restarting..." | tee -a "$LOG_FILE"
142+
echo "$timestamp: Service ($service_command) exceeded resource limit or stopped. Restarting..." | tee -a "$LOG_FILE"
70143
else
71144
echo "Service ($service_command) completed successfully."
72145
break

core/start-blueos-core

Lines changed: 64 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -107,37 +107,44 @@ find /usr/blueos/userdata -type f -exec chmod a+rw {} \;
107107
# and ~1min30s using this strategy.
108108
# From that 1min30s, the startup time is about ~25s, and originally, ~37s, meaning that the
109109
# remaining (~65 seconds) is the docker shutting down, and the Linux booting up.
110+
#
111+
# Service tuple format:
112+
# NAME,MEMORY_MB,CPU_PERCENT,IO_READ_MBPS,IO_WRITE_MBPS,COMMAND
113+
# - MEMORY_MB: Memory limit in MB (0 = no limit)
114+
# - CPU_PERCENT: CPU limit as percentage (100 = 1 core, 200 = 2 cores, 0 = no limit)
115+
# - IO_READ_MBPS: I/O read limit in MB/s (0 = no limit)
116+
# - IO_WRITE_MBPS: I/O write limit in MB/s (0 = no limit)
110117
PRIORITY_SERVICES=(
111-
'autopilot',0,"nice --19 $SERVICES_PATH/ardupilot_manager/main.py"
112-
'cable_guy',0,"$SERVICES_PATH/cable_guy/main.py"
113-
'video',0,"nice --19 mavlink-camera-manager --default-settings BlueROVUDP --mavlink tcpout:127.0.0.1:5777 --mavlink-system-id $MAV_SYSTEM_ID --mavlink-camera-component-id-range=100-105 --gst-feature-rank omxh264enc=0,v4l2h264enc=250,x264enc=260 --log-path /var/logs/blueos/services/mavlink-camera-manager --stun-server stun://stun.l.google.com:19302 --zenoh --verbose"
114-
'mavlink2rest',0,"mavlink2rest --connect=udpout:127.0.0.1:14001 --server [::]:6040 --system-id $MAV_SYSTEM_ID --component-id $MAV_COMPONENT_ID_ONBOARD_COMPUTER4"
118+
'autopilot',0,0,0,0,"nice --19 $SERVICES_PATH/ardupilot_manager/main.py"
119+
'cable_guy',0,0,0,0,"$SERVICES_PATH/cable_guy/main.py"
120+
'video',0,0,0,0,"nice --19 mavlink-camera-manager --default-settings BlueROVUDP --mavlink tcpout:127.0.0.1:5777 --mavlink-system-id $MAV_SYSTEM_ID --mavlink-camera-component-id-range=100-105 --gst-feature-rank omxh264enc=0,v4l2h264enc=250,x264enc=260 --log-path /var/logs/blueos/services/mavlink-camera-manager --stun-server stun://stun.l.google.com:19302 --zenoh --verbose"
121+
'mavlink2rest',0,0,0,0,"mavlink2rest --connect=udpout:127.0.0.1:14001 --server [::]:6040 --system-id $MAV_SYSTEM_ID --component-id $MAV_COMPONENT_ID_ONBOARD_COMPUTER4"
115122
)
116123

117124
SERVICES=(
118125
# This services are not prioritized because they are not fundamental for the vehicle to work
119-
'kraken',0,"nice -19 $BLUEOS_PYTHON_BIN_SECONDARY $SERVICES_PATH/kraken/main.py"
120-
'wifi',0,"nice -19 $SERVICES_PATH/wifi/main.py --socket wlan0"
121-
'zenohd',0,"ZENOH_BACKEND_FS_ROOT=$TOOLS_PATH/zenoh zenohd -c $TOOLS_PATH/zenoh/blueos-zenoh.json5"
126+
'kraken',0,0,0,0,"nice -19 $BLUEOS_PYTHON_BIN_SECONDARY $SERVICES_PATH/kraken/main.py"
127+
'wifi',0,0,0,0,"nice -19 $SERVICES_PATH/wifi/main.py --socket wlan0"
128+
'zenohd',0,0,0,0,"ZENOH_BACKEND_FS_ROOT=$TOOLS_PATH/zenoh zenohd -c $TOOLS_PATH/zenoh/blueos-zenoh.json5"
122129
# This services are not as important as the others
123-
'beacon',250,"$SERVICES_PATH/beacon/main.py"
124-
'bridget',0,"nice -19 $RUN_AS_REGULAR_USER_BEGIN $SERVICES_PATH/bridget/main.py $RUN_AS_REGULAR_USER_END"
125-
'commander',250,"$SERVICES_PATH/commander/main.py"
126-
'nmea_injector',250,"nice -19 $SERVICES_PATH/nmea_injector/main.py"
127-
'helper',250,"$BLUEOS_PYTHON_BIN_SECONDARY $SERVICES_PATH/helper/main.py"
128-
'iperf3',250," iperf3 --server --port 5201"
129-
'linux2rest',250,"linux2rest --log-settings netstat=30,platform=10,serial-ports=10,cpu=10,disk=30,info=10,memory=10,network=10,process=60,temperature=10,unix-time-seconds=10,usb=60"
130-
'filebrowser',250,"nice -19 filebrowser --database /etc/filebrowser/filebrowser.db --baseurl /file-browser"
131-
'versionchooser',0,"$BLUEOS_PYTHON_BIN_SECONDARY $SERVICES_PATH/versionchooser/main.py"
132-
'pardal',250,"nice -19 $SERVICES_PATH/pardal/main.py"
133-
'ping',0,"nice -19 $RUN_AS_REGULAR_USER_BEGIN $SERVICES_PATH/ping/main.py $RUN_AS_REGULAR_USER_END"
134-
'user_terminal',0,"cat /etc/motd"
135-
'ttyd',250,'nice -19 ttyd -p 8088 sh -c "/usr/bin/tmux attach -t user_terminal || /usr/bin/tmux new -s user_terminal"'
136-
'nginx',250,"nice -18 nginx -g \"daemon off;\" -c $TOOLS_PATH/nginx/nginx.conf"
137-
'bag_of_holding',250,"$SERVICES_PATH/bag_of_holding/main.py"
138-
'recorder',250,"blueos-recorder --recorder-path /usr/blueos/userdata/recorder"
139-
'recorder_extractor',250,"$SERVICES_PATH/recorder_extractor/main.py"
140-
'disk_usage',250,"$SERVICES_PATH/disk_usage/main.py"
130+
'beacon',250,0,0,0,"$SERVICES_PATH/beacon/main.py"
131+
'bridget',0,0,0,0,"nice -19 $RUN_AS_REGULAR_USER_BEGIN $SERVICES_PATH/bridget/main.py $RUN_AS_REGULAR_USER_END"
132+
'commander',250,0,0,0,"$SERVICES_PATH/commander/main.py"
133+
'nmea_injector',250,0,0,0,"nice -19 $SERVICES_PATH/nmea_injector/main.py"
134+
'helper',250,0,0,0,"$BLUEOS_PYTHON_BIN_SECONDARY $SERVICES_PATH/helper/main.py"
135+
'iperf3',250,0,0,0," iperf3 --server --port 5201"
136+
'linux2rest',250,0,0,0,"linux2rest --log-settings netstat=30,platform=10,serial-ports=10,cpu=10,disk=30,info=10,memory=10,network=10,process=60,temperature=10,unix-time-seconds=10,usb=60"
137+
'filebrowser',250,0,0,0,"nice -19 filebrowser --database /etc/filebrowser/filebrowser.db --baseurl /file-browser"
138+
'versionchooser',0,0,0,0,"$BLUEOS_PYTHON_BIN_SECONDARY $SERVICES_PATH/versionchooser/main.py"
139+
'pardal',250,0,0,0,"nice -19 $SERVICES_PATH/pardal/main.py"
140+
'ping',0,0,0,0,"nice -19 $RUN_AS_REGULAR_USER_BEGIN $SERVICES_PATH/ping/main.py $RUN_AS_REGULAR_USER_END"
141+
'user_terminal',0,0,0,0,"cat /etc/motd"
142+
'ttyd',250,0,0,0,'nice -19 ttyd -p 8088 sh -c "/usr/bin/tmux attach -t user_terminal || /usr/bin/tmux new -s user_terminal"'
143+
'nginx',250,0,0,0,"nice -18 nginx -g \"daemon off;\" -c $TOOLS_PATH/nginx/nginx.conf"
144+
'bag_of_holding',250,0,0,0,"$SERVICES_PATH/bag_of_holding/main.py"
145+
'recorder',250,0,0,0,"blueos-recorder --recorder-path /usr/blueos/userdata/recorder"
146+
'recorder_extractor',250,0,0,0,"$SERVICES_PATH/recorder_extractor/main.py"
147+
'disk_usage',250,0,0,0,"$SERVICES_PATH/disk_usage/main.py"
141148
)
142149

143150
tmux -f /etc/tmux.conf start-server
@@ -147,10 +154,29 @@ function create_service {
147154
SESSION_NAME="$1:0"
148155
SERVICE_NAME="$1"
149156
local command="$2" # Store the command as a string
150-
local memory_limit_mb=$3
157+
local memory_limit_mb=${3:-0}
158+
local cpu_limit_percent=${4:-0}
159+
local io_read_mbps=${5:-0}
160+
local io_write_mbps=${6:-0}
161+
162+
if [ -n "${BLUEOS_DISABLE_RESOURCE_LIMITS}" ]; then
163+
memory_limit_mb=0
164+
cpu_limit_percent=0
165+
io_read_mbps=0
166+
io_write_mbps=0
167+
fi
151168

152169
if [ -n "${BLUEOS_DISABLE_MEMORY_LIMIT}" ]; then
153-
memory_limit_mb=$TOTAL_RAM_MB
170+
memory_limit_mb=0
171+
fi
172+
173+
if [ -n "${BLUEOS_DISABLE_CPU_LIMIT}" ]; then
174+
cpu_limit_percent=0
175+
fi
176+
177+
if [ -n "${BLUEOS_DISABLE_IO_LIMIT}" ]; then
178+
io_read_mbps=0
179+
io_write_mbps=0
154180
fi
155181

156182
# Check if the service is disabled
@@ -159,16 +185,18 @@ function create_service {
159185
tmux send-keys -t $SESSION_NAME "echo 'Service $1 is disabled'; sleep infinity" C-m
160186
return
161187
fi
162-
echo "Service: $NAME: $EXECUTABLE with memory limit: $memory_limit_mb MB"
188+
echo "Service: $SERVICE_NAME: mem=${memory_limit_mb}MB cpu=${cpu_limit_percent}% io_r=${io_read_mbps}MB/s io_w=${io_write_mbps}MB/s"
163189

164190
# Set all necessary environment variables for the new tmux session
165191
for NAME in $(compgen -v | grep -e MAV_ -e BLUEOS_); do
166192
VALUE=${!NAME}
167193
tmux setenv -t "$SESSION_NAME" -g "$NAME" "$VALUE"
168194
done
195+
# Pass DOCKER_CGROUP for cgroup path resolution in run-service
196+
tmux setenv -t "$SESSION_NAME" -g "DOCKER_CGROUP" "$DOCKER_CGROUP"
169197

170-
# Use run_service to start the service with the memory limit
171-
tmux send-keys -t $SESSION_NAME "run-service '$SERVICE_NAME' '$command' $memory_limit_mb " C-m
198+
# Use run_service to start the service with resource limits
199+
tmux send-keys -t $SESSION_NAME "run-service '$SERVICE_NAME' '$command' $memory_limit_mb $cpu_limit_percent $io_read_mbps $io_write_mbps" C-m
172200
}
173201

174202
SSH_USER=${SSH_USER:-pi}
@@ -220,24 +248,24 @@ prepare_cgroups() {
220248
cat $DOCKER_CGROUP_PATH/cgroup.procs
221249
fi
222250

223-
echo "Enabling subtree_control..."
224-
echo "+memory" > $DOCKER_CGROUP_PATH/cgroup.subtree_control && echo "subtree_control enabled"
251+
echo "Enabling subtree_control for memory, cpu, and io on container cgroup..."
252+
echo "+memory +cpu +io" > $DOCKER_CGROUP_PATH/cgroup.subtree_control && echo "subtree_control enabled on container cgroup"
225253
}
226254

227255
prepare_cgroups
228256

229257
echo "Starting high priority services.."
230258
for TUPLE in "${PRIORITY_SERVICES[@]}"; do
231-
IFS=',' read -r NAME MEMORY_LIMIT_MB EXECUTABLE <<< "$TUPLE"
232-
create_service "$NAME" "$EXECUTABLE" "$MEMORY_LIMIT_MB"
259+
IFS=',' read -r NAME MEMORY_MB CPU_PERCENT IO_READ_MBPS IO_WRITE_MBPS EXECUTABLE <<< "$TUPLE"
260+
create_service "$NAME" "$EXECUTABLE" "$MEMORY_MB" "$CPU_PERCENT" "$IO_READ_MBPS" "$IO_WRITE_MBPS"
233261
done
234262

235263
sleep 5
236264

237265
echo "Starting other services.."
238266
for TUPLE in "${SERVICES[@]}"; do
239-
IFS=',' read -r NAME MEMORY_LIMIT_MB EXECUTABLE <<< "$TUPLE"
240-
create_service "$NAME" "$EXECUTABLE" "$MEMORY_LIMIT_MB"
267+
IFS=',' read -r NAME MEMORY_MB CPU_PERCENT IO_READ_MBPS IO_WRITE_MBPS EXECUTABLE <<< "$TUPLE"
268+
create_service "$NAME" "$EXECUTABLE" "$MEMORY_MB" "$CPU_PERCENT" "$IO_READ_MBPS" "$IO_WRITE_MBPS"
241269
done
242270

243271
echo "BlueOS running!"

0 commit comments

Comments
 (0)