Skip to content

Commit 35f5db7

Browse files
committed
return memory.peak after calling sandboxed commands
1 parent 319f2be commit 35f5db7

4 files changed

Lines changed: 182 additions & 26 deletions

File tree

src/cmd/mod.rs

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -375,9 +375,9 @@ impl<'w> Command<'w, '_> {
375375

376376
/// Run the prepared command and return an error if it fails (for example with a non-zero exit
377377
/// code or a timeout).
378-
pub fn run(self) -> Result<(), CommandError> {
379-
self.run_inner(false)?;
380-
Ok(())
378+
pub fn run(self) -> Result<ProcessStatistics, CommandError> {
379+
let output = self.run_inner(false)?;
380+
Ok(output.statistics)
381381
}
382382

383383
/// Run the prepared command and return its output if it succeedes. If it fails (for example
@@ -551,15 +551,26 @@ impl From<InnerProcessOutput> for ProcessOutput {
551551
ProcessOutput {
552552
stdout: orig.stdout,
553553
stderr: orig.stderr,
554+
statistics: ProcessStatistics::default(),
554555
}
555556
}
556557
}
557558

559+
/// collected statistics about the process execution.
560+
#[derive(Default)]
561+
pub struct ProcessStatistics {
562+
/// peak memory usage in bytes.
563+
/// This is populated for sandboxed commands on systems
564+
/// with cgroups v1/v2.
565+
pub memory_peak: Option<u64>,
566+
}
567+
558568
/// Output of a [`Command`](struct.Command.html) when it was executed with the
559569
/// [`run_capture`](struct.Command.html#method.run_capture) method.
560570
pub struct ProcessOutput {
561571
stdout: Vec<String>,
562572
stderr: Vec<String>,
573+
statistics: ProcessStatistics,
563574
}
564575

565576
impl ProcessOutput {
@@ -572,6 +583,14 @@ impl ProcessOutput {
572583
pub fn stderr_lines(&self) -> &[String] {
573584
&self.stderr
574585
}
586+
587+
/// Return the peak memory usage in bytes of the sandbox container, if available.
588+
///
589+
/// This is populated for sandboxed commands on systems with cgroups v2. Returns `None` for
590+
/// non-sandboxed commands or when the metric could not be read.
591+
pub fn memory_peak_bytes(&self) -> Option<u64> {
592+
self.statistics.memory_peak
593+
}
575594
}
576595

577596
enum OutputKind {

src/cmd/sandbox.rs

Lines changed: 135 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::Workspace;
2-
use crate::cmd::{Command, CommandError, ProcessLinesActions, ProcessOutput};
2+
use crate::cmd::{Command, CommandError, ProcessLinesActions, ProcessOutput, ProcessStatistics};
33
use log::{error, info};
44
use serde::Deserialize;
55
use std::error::Error;
@@ -226,6 +226,7 @@ impl SandboxBuilder {
226226
fn create(self, workspace: &Workspace) -> Result<Container<'_>, CommandError> {
227227
let mut args: Vec<String> = vec!["create".into()];
228228

229+
// Mounts are container-level config, always on `docker create`
229230
for mount in &self.mounts {
230231
std::fs::create_dir_all(&mount.host_path)?;
231232

@@ -240,16 +241,7 @@ impl SandboxBuilder {
240241
}
241242
}
242243

243-
for (var, value) in &self.env {
244-
args.push("-e".into());
245-
args.push(format! {"{var}={value}"})
246-
}
247-
248-
if let Some(workdir) = self.workdir {
249-
args.push("-w".into());
250-
args.push(workdir);
251-
}
252-
244+
// Resource limits and networking are container-level config
253245
if let Some(limit) = self.memory_limit {
254246
args.push("-m".into());
255247
args.push(limit.to_string());
@@ -260,11 +252,6 @@ impl SandboxBuilder {
260252
args.push(limit.to_string());
261253
}
262254

263-
if let Some(user) = self.user {
264-
args.push("--user".into());
265-
args.push(user);
266-
}
267-
268255
if !self.enable_networking {
269256
args.push("--network".into());
270257
args.push("none".into());
@@ -276,9 +263,10 @@ impl SandboxBuilder {
276263

277264
args.push(workspace.sandbox_image().name.clone());
278265

279-
for arg in self.cmd {
280-
args.push(arg);
281-
}
266+
// Use an idle command; the real command runs via `docker exec` so the container stays
267+
// alive after the command finishes, allowing us to read cgroup metrics.
268+
args.push("sleep".into());
269+
args.push("infinity".into());
282270

283271
let out = Command::new(workspace, "docker")
284272
.args(&args)
@@ -287,6 +275,10 @@ impl SandboxBuilder {
287275
Ok(Container {
288276
id: out.stdout_lines()[0].clone(),
289277
workspace,
278+
cmd: self.cmd,
279+
env: self.env,
280+
workdir: self.workdir,
281+
user: self.user,
290282
})
291283
}
292284

@@ -345,6 +337,11 @@ struct Container<'w> {
345337
// Docker container ID
346338
id: String,
347339
workspace: &'w Workspace,
340+
// Command-level config for `docker exec` (not baked into `docker create`)
341+
cmd: Vec<String>,
342+
env: Vec<(String, String)>,
343+
workdir: Option<String>,
344+
user: Option<String>,
348345
}
349346

350347
impl fmt::Display for Container<'_> {
@@ -367,6 +364,82 @@ impl Container<'_> {
367364
Ok(data.pop().unwrap())
368365
}
369366

367+
/// Start the container in detached mode (without `-a`).
368+
fn start(&self) -> Result<(), CommandError> {
369+
Command::new(self.workspace, "docker")
370+
.args(&["start", &self.id])
371+
.log_output(false)
372+
.run()
373+
.map(|_| ())
374+
}
375+
376+
/// Stop a running container. Uses `-t 1` to give `sleep infinity` a short grace period.
377+
fn stop(&self) -> Result<(), CommandError> {
378+
Command::new(self.workspace, "docker")
379+
.args(&["stop", "-t", "1", &self.id])
380+
.log_output(false)
381+
.run()
382+
.map(|_| ())
383+
}
384+
385+
/// Helper to `docker exec cat <path>` and return stdout lines on success.
386+
fn exec_cat_file(&self, path: &str) -> Option<Vec<String>> {
387+
Command::new(self.workspace, "docker")
388+
.args(&["exec", &self.id, "cat", path])
389+
.log_output(false)
390+
.log_command(false)
391+
.run_capture()
392+
.ok()
393+
.map(|o| o.stdout_lines().to_vec())
394+
}
395+
396+
/// Best-effort read of peak memory usage from the still-running container.
397+
/// Tries cgroups v2 first, then falls back to cgroups v1.
398+
fn read_memory_peak(&self) -> Option<u64> {
399+
let paths = [
400+
"/sys/fs/cgroup/memory.peak", // v2
401+
"/sys/fs/cgroup/memory/memory.max_usage_in_bytes", // v1
402+
];
403+
for path in paths {
404+
if let Some(val) = self
405+
.exec_cat_file(path)
406+
.and_then(|lines| lines.first()?.trim().parse::<u64>().ok())
407+
{
408+
return Some(val);
409+
}
410+
}
411+
None
412+
}
413+
414+
/// Check if any OOM kills occurred in the container's cgroup.
415+
///
416+
/// With the `docker exec` model, the OOM killer may only kill the exec'd process
417+
/// while `sleep infinity` (PID 1) survives. In that case `docker inspect` won't
418+
/// report `OOMKilled`, so we check the cgroup events directly.
419+
/// Tries cgroups v2 first, then falls back to cgroups v1.
420+
fn check_cgroup_oom(&self) -> bool {
421+
// Both v1 and v2 expose `oom_kill <count>` — just in different files.
422+
let paths = [
423+
"/sys/fs/cgroup/memory.events", // v2
424+
"/sys/fs/cgroup/memory/memory.oom_control", // v1
425+
];
426+
for path in paths {
427+
if let Some(lines) = self.exec_cat_file(path) {
428+
let found = lines.iter().any(|line| {
429+
line.strip_prefix("oom_kill ")
430+
.and_then(|rest| rest.trim().parse::<u64>().ok())
431+
.is_some_and(|count| count > 0)
432+
});
433+
if found {
434+
return true;
435+
}
436+
// File existed but no OOM — don't try the other version
437+
return false;
438+
}
439+
}
440+
false
441+
}
442+
370443
#[allow(clippy::type_complexity)]
371444
fn run(
372445
&self,
@@ -377,8 +450,32 @@ impl Container<'_> {
377450
log_command: bool,
378451
capture: bool,
379452
) -> Result<ProcessOutput, CommandError> {
453+
// Start the container in detached mode (runs `sleep infinity`)
454+
self.start()?;
455+
456+
// Build the `docker exec` command with env/workdir/user from the sandbox config
457+
let mut args: Vec<String> = vec!["exec".into()];
458+
459+
for (var, value) in &self.env {
460+
args.push("-e".into());
461+
args.push(format!("{var}={value}"));
462+
}
463+
464+
if let Some(ref workdir) = self.workdir {
465+
args.push("-w".into());
466+
args.push(workdir.clone());
467+
}
468+
469+
if let Some(ref user) = self.user {
470+
args.push("--user".into());
471+
args.push(user.clone());
472+
}
473+
474+
args.push(self.id.clone());
475+
args.extend(self.cmd.iter().cloned());
476+
380477
let mut cmd = Command::new(self.workspace, "docker")
381-
.args(&["start", "-a", &self.id])
478+
.args(&args)
382479
.timeout(timeout)
383480
.log_output(log_output)
384481
.log_command(log_command)
@@ -389,23 +486,39 @@ impl Container<'_> {
389486
}
390487

391488
let res = cmd.run_inner(capture);
489+
490+
// Read peak memory usage while the container is still running (best-effort)
491+
let memory_peak = self.read_memory_peak();
492+
493+
// Check OOM via cgroup events (catches cases where only the exec'd process
494+
// was killed, leaving the container's init process alive)
495+
let cgroup_oom = self.check_cgroup_oom();
496+
497+
// Explicitly stop the container now that we're done reading metrics.
498+
// The scopeguard will still call `docker rm -f` for final cleanup.
499+
let _ = self.stop();
500+
392501
let details = self.inspect()?;
393502

394503
// Return a different error if the container was killed due to an OOM
395-
if details.state.oom_killed {
504+
if details.state.oom_killed || cgroup_oom {
396505
Err(match res {
397506
Ok(_) | Err(CommandError::ExecutionFailed { .. }) => CommandError::SandboxOOM,
398507
Err(err) => err,
399508
})
400509
} else {
401-
res
510+
res.map(|mut output| {
511+
output.statistics = ProcessStatistics { memory_peak };
512+
output
513+
})
402514
}
403515
}
404516

405517
fn delete(&self) -> Result<(), CommandError> {
406518
Command::new(self.workspace, "docker")
407519
.args(&["rm", "-f", &self.id])
408520
.run()
521+
.map(|_| ())
409522
}
410523
}
411524

src/crates/git.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ impl CrateTrait for GitRepo {
101101
if private_repository && res.is_err() {
102102
Err(PrepareError::PrivateGitRepository.into())
103103
} else {
104-
Ok(res?)
104+
Ok(res.map(|_| ())?)
105105
}
106106
}
107107

tests/buildtest/mod.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,30 @@ fn test_process_lines() {
163163
});
164164
}
165165

166+
#[test]
167+
#[cfg(not(windows))]
168+
fn test_memory_peak() {
169+
runner::run("hello-world", |run| {
170+
run.run(
171+
SandboxBuilder::new()
172+
.enable_networking(false)
173+
.memory_limit(Some(512 * 1024 * 1024)),
174+
|build| {
175+
let output = build.cargo().args(&["run"]).run_capture()?;
176+
if let Some(peak) = output.memory_peak_bytes() {
177+
assert!(peak > 0, "memory peak should be positive");
178+
assert!(
179+
peak < 512 * 1024 * 1024,
180+
"memory peak should be below the limit"
181+
);
182+
}
183+
Ok(())
184+
},
185+
)?;
186+
Ok(())
187+
});
188+
}
189+
166190
#[test]
167191
#[cfg(not(windows))]
168192
fn test_sandbox_oom() {

0 commit comments

Comments
 (0)