return memory.peak after calling sandboxed commands

syphar · syphar · commit 35f5db7701dc · 2026-02-11T06:04:26.000+01:00
diff --git a/src/cmd/mod.rs b/src/cmd/mod.rs
@@ -375,9 +375,9 @@ impl<'w> Command<'w, '_> {
 
     /// Run the prepared command and return an error if it fails (for example with a non-zero exit
     /// code or a timeout).
-    pub fn run(self) -> Result<(), CommandError> {
-        self.run_inner(false)?;
-        Ok(())
+    pub fn run(self) -> Result<ProcessStatistics, CommandError> {
+        let output = self.run_inner(false)?;
+        Ok(output.statistics)
     }
 
     /// Run the prepared command and return its output if it succeedes. If it fails (for example
@@ -551,15 +551,26 @@ impl From<InnerProcessOutput> for ProcessOutput {
         ProcessOutput {
             stdout: orig.stdout,
             stderr: orig.stderr,
+            statistics: ProcessStatistics::default(),
         }
     }
 }
 
+/// collected statistics about the process execution.
+#[derive(Default)]
+pub struct ProcessStatistics {
+    /// peak memory usage in bytes.
+    /// This is populated for sandboxed commands on systems
+    /// with cgroups v1/v2.
+    pub memory_peak: Option<u64>,
+}
+
 /// Output of a [`Command`](struct.Command.html) when it was executed with the
 /// [`run_capture`](struct.Command.html#method.run_capture) method.
 pub struct ProcessOutput {
     stdout: Vec<String>,
     stderr: Vec<String>,
+    statistics: ProcessStatistics,
 }
 
 impl ProcessOutput {
@@ -572,6 +583,14 @@ impl ProcessOutput {
     pub fn stderr_lines(&self) -> &[String] {
         &self.stderr
     }
+
+    /// Return the peak memory usage in bytes of the sandbox container, if available.
+    ///
+    /// This is populated for sandboxed commands on systems with cgroups v2. Returns `None` for
+    /// non-sandboxed commands or when the metric could not be read.
+    pub fn memory_peak_bytes(&self) -> Option<u64> {
+        self.statistics.memory_peak
+    }
 }
 
 enum OutputKind {
diff --git a/src/cmd/sandbox.rs b/src/cmd/sandbox.rs
@@ -1,5 +1,5 @@
 use crate::Workspace;
-use crate::cmd::{Command, CommandError, ProcessLinesActions, ProcessOutput};
+use crate::cmd::{Command, CommandError, ProcessLinesActions, ProcessOutput, ProcessStatistics};
 use log::{error, info};
 use serde::Deserialize;
 use std::error::Error;
@@ -226,6 +226,7 @@ impl SandboxBuilder {
     fn create(self, workspace: &Workspace) -> Result<Container<'_>, CommandError> {
         let mut args: Vec<String> = vec!["create".into()];
 
+        // Mounts are container-level config, always on `docker create`
         for mount in &self.mounts {
             std::fs::create_dir_all(&mount.host_path)?;
 
@@ -240,16 +241,7 @@ impl SandboxBuilder {
             }
         }
 
-        for (var, value) in &self.env {
-            args.push("-e".into());
-            args.push(format! {"{var}={value}"})
-        }
-
-        if let Some(workdir) = self.workdir {
-            args.push("-w".into());
-            args.push(workdir);
-        }
-
+        // Resource limits and networking are container-level config
         if let Some(limit) = self.memory_limit {
             args.push("-m".into());
             args.push(limit.to_string());
@@ -260,11 +252,6 @@ impl SandboxBuilder {
             args.push(limit.to_string());
         }
 
-        if let Some(user) = self.user {
-            args.push("--user".into());
-            args.push(user);
-        }
-
         if !self.enable_networking {
             args.push("--network".into());
             args.push("none".into());
@@ -276,9 +263,10 @@ impl SandboxBuilder {
 
         args.push(workspace.sandbox_image().name.clone());
 
-        for arg in self.cmd {
-            args.push(arg);
-        }
+        // Use an idle command; the real command runs via `docker exec` so the container stays
+        // alive after the command finishes, allowing us to read cgroup metrics.
+        args.push("sleep".into());
+        args.push("infinity".into());
 
         let out = Command::new(workspace, "docker")
             .args(&args)
@@ -287,6 +275,10 @@ impl SandboxBuilder {
         Ok(Container {
             id: out.stdout_lines()[0].clone(),
             workspace,
+            cmd: self.cmd,
+            env: self.env,
+            workdir: self.workdir,
+            user: self.user,
         })
     }
 
@@ -345,6 +337,11 @@ struct Container<'w> {
     // Docker container ID
     id: String,
     workspace: &'w Workspace,
+    // Command-level config for `docker exec` (not baked into `docker create`)
+    cmd: Vec<String>,
+    env: Vec<(String, String)>,
+    workdir: Option<String>,
+    user: Option<String>,
 }
 
 impl fmt::Display for Container<'_> {
@@ -367,6 +364,82 @@ impl Container<'_> {
         Ok(data.pop().unwrap())
     }
 
+    /// Start the container in detached mode (without `-a`).
+    fn start(&self) -> Result<(), CommandError> {
+        Command::new(self.workspace, "docker")
+            .args(&["start", &self.id])
+            .log_output(false)
+            .run()
+            .map(|_| ())
+    }
+
+    /// Stop a running container. Uses `-t 1` to give `sleep infinity` a short grace period.
+    fn stop(&self) -> Result<(), CommandError> {
+        Command::new(self.workspace, "docker")
+            .args(&["stop", "-t", "1", &self.id])
+            .log_output(false)
+            .run()
+            .map(|_| ())
+    }
+
+    /// Helper to `docker exec cat <path>` and return stdout lines on success.
+    fn exec_cat_file(&self, path: &str) -> Option<Vec<String>> {
+        Command::new(self.workspace, "docker")
+            .args(&["exec", &self.id, "cat", path])
+            .log_output(false)
+            .log_command(false)
+            .run_capture()
+            .ok()
+            .map(|o| o.stdout_lines().to_vec())
+    }
+
+    /// Best-effort read of peak memory usage from the still-running container.
+    /// Tries cgroups v2 first, then falls back to cgroups v1.
+    fn read_memory_peak(&self) -> Option<u64> {
+        let paths = [
+            "/sys/fs/cgroup/memory.peak",                      // v2
+            "/sys/fs/cgroup/memory/memory.max_usage_in_bytes", // v1
+        ];
+        for path in paths {
+            if let Some(val) = self
+                .exec_cat_file(path)
+                .and_then(|lines| lines.first()?.trim().parse::<u64>().ok())
+            {
+                return Some(val);
+            }
+        }
+        None
+    }
+
+    /// Check if any OOM kills occurred in the container's cgroup.
+    ///
+    /// With the `docker exec` model, the OOM killer may only kill the exec'd process
+    /// while `sleep infinity` (PID 1) survives. In that case `docker inspect` won't
+    /// report `OOMKilled`, so we check the cgroup events directly.
+    /// Tries cgroups v2 first, then falls back to cgroups v1.
+    fn check_cgroup_oom(&self) -> bool {
+        // Both v1 and v2 expose `oom_kill <count>` — just in different files.
+        let paths = [
+            "/sys/fs/cgroup/memory.events",             // v2
+            "/sys/fs/cgroup/memory/memory.oom_control", // v1
+        ];
+        for path in paths {
+            if let Some(lines) = self.exec_cat_file(path) {
+                let found = lines.iter().any(|line| {
+                    line.strip_prefix("oom_kill ")
+                        .and_then(|rest| rest.trim().parse::<u64>().ok())
+                        .is_some_and(|count| count > 0)
+                });
+                if found {
+                    return true;
+                }
+                // File existed but no OOM — don't try the other version
+                return false;
+            }
+        }
+        false
+    }
+
     #[allow(clippy::type_complexity)]
     fn run(
         &self,
@@ -377,8 +450,32 @@ impl Container<'_> {
         log_command: bool,
         capture: bool,
     ) -> Result<ProcessOutput, CommandError> {
+        // Start the container in detached mode (runs `sleep infinity`)
+        self.start()?;
+
+        // Build the `docker exec` command with env/workdir/user from the sandbox config
+        let mut args: Vec<String> = vec!["exec".into()];
+
+        for (var, value) in &self.env {
+            args.push("-e".into());
+            args.push(format!("{var}={value}"));
+        }
+
+        if let Some(ref workdir) = self.workdir {
+            args.push("-w".into());
+            args.push(workdir.clone());
+        }
+
+        if let Some(ref user) = self.user {
+            args.push("--user".into());
+            args.push(user.clone());
+        }
+
+        args.push(self.id.clone());
+        args.extend(self.cmd.iter().cloned());
+
         let mut cmd = Command::new(self.workspace, "docker")
-            .args(&["start", "-a", &self.id])
+            .args(&args)
             .timeout(timeout)
             .log_output(log_output)
             .log_command(log_command)
@@ -389,23 +486,39 @@ impl Container<'_> {
         }
 
         let res = cmd.run_inner(capture);
+
+        // Read peak memory usage while the container is still running (best-effort)
+        let memory_peak = self.read_memory_peak();
+
+        // Check OOM via cgroup events (catches cases where only the exec'd process
+        // was killed, leaving the container's init process alive)
+        let cgroup_oom = self.check_cgroup_oom();
+
+        // Explicitly stop the container now that we're done reading metrics.
+        // The scopeguard will still call `docker rm -f` for final cleanup.
+        let _ = self.stop();
+
         let details = self.inspect()?;
 
         // Return a different error if the container was killed due to an OOM
-        if details.state.oom_killed {
+        if details.state.oom_killed || cgroup_oom {
             Err(match res {
                 Ok(_) | Err(CommandError::ExecutionFailed { .. }) => CommandError::SandboxOOM,
                 Err(err) => err,
             })
         } else {
-            res
+            res.map(|mut output| {
+                output.statistics = ProcessStatistics { memory_peak };
+                output
+            })
         }
     }
 
     fn delete(&self) -> Result<(), CommandError> {
         Command::new(self.workspace, "docker")
             .args(&["rm", "-f", &self.id])
             .run()
+            .map(|_| ())
     }
 }
 
diff --git a/src/crates/git.rs b/src/crates/git.rs
@@ -101,7 +101,7 @@ impl CrateTrait for GitRepo {
         if private_repository && res.is_err() {
             Err(PrepareError::PrivateGitRepository.into())
         } else {
-            Ok(res?)
+            Ok(res.map(|_| ())?)
         }
     }
 
diff --git a/tests/buildtest/mod.rs b/tests/buildtest/mod.rs
@@ -163,6 +163,30 @@ fn test_process_lines() {
     });
 }
 
+#[test]
+#[cfg(not(windows))]
+fn test_memory_peak() {
+    runner::run("hello-world", |run| {
+        run.run(
+            SandboxBuilder::new()
+                .enable_networking(false)
+                .memory_limit(Some(512 * 1024 * 1024)),
+            |build| {
+                let output = build.cargo().args(&["run"]).run_capture()?;
+                if let Some(peak) = output.memory_peak_bytes() {
+                    assert!(peak > 0, "memory peak should be positive");
+                    assert!(
+                        peak < 512 * 1024 * 1024,
+                        "memory peak should be below the limit"
+                    );
+                }
+                Ok(())
+            },
+        )?;
+        Ok(())
+    });
+}
+
 #[test]
 #[cfg(not(windows))]
 fn test_sandbox_oom() {

Original file line number	Diff line number	Diff line change
`@@ -101,7 +101,7 @@ impl CrateTrait for GitRepo {`
`101`	`101`	`if private_repository && res.is_err() {`
`102`	`102`	`Err(PrepareError::PrivateGitRepository.into())`
`103`	`103`	`} else {`
`104`		`- Ok(res?)`
	`104`	`+ Ok(res.map(\|_\| ())?)`
`105`	`105`	`}`
`106`	`106`	`}`
`107`	`107`