Skip to content

Commit ede1e47

Browse files
Merge pull request #115 from syphar/rustwide-memory-2
return memory.peak after calling sandboxed commands
2 parents 2421f85 + 35f5db7 commit ede1e47

4 files changed

Lines changed: 182 additions & 26 deletions

File tree

src/cmd/mod.rs

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -375,9 +375,9 @@ impl<'w> Command<'w, '_> {
375375

376376
/// Run the prepared command and return an error if it fails (for example with a non-zero exit
377377
/// code or a timeout).
378-
pub fn run(self) -> Result<(), CommandError> {
379-
self.run_inner(false)?;
380-
Ok(())
378+
pub fn run(self) -> Result<ProcessStatistics, CommandError> {
379+
let output = self.run_inner(false)?;
380+
Ok(output.statistics)
381381
}
382382

383383
/// Run the prepared command and return its output if it succeedes. If it fails (for example
@@ -551,15 +551,26 @@ impl From<InnerProcessOutput> for ProcessOutput {
551551
ProcessOutput {
552552
stdout: orig.stdout,
553553
stderr: orig.stderr,
554+
statistics: ProcessStatistics::default(),
554555
}
555556
}
556557
}
557558

559+
/// collected statistics about the process execution.
560+
#[derive(Default)]
561+
pub struct ProcessStatistics {
562+
/// peak memory usage in bytes.
563+
/// This is populated for sandboxed commands on systems
564+
/// with cgroups v1/v2.
565+
pub memory_peak: Option<u64>,
566+
}
567+
558568
/// Output of a [`Command`](struct.Command.html) when it was executed with the
559569
/// [`run_capture`](struct.Command.html#method.run_capture) method.
560570
pub struct ProcessOutput {
561571
stdout: Vec<String>,
562572
stderr: Vec<String>,
573+
statistics: ProcessStatistics,
563574
}
564575

565576
impl ProcessOutput {
@@ -572,6 +583,14 @@ impl ProcessOutput {
572583
pub fn stderr_lines(&self) -> &[String] {
573584
&self.stderr
574585
}
586+
587+
/// Return the peak memory usage in bytes of the sandbox container, if available.
588+
///
589+
/// This is populated for sandboxed commands on systems with cgroups v2. Returns `None` for
590+
/// non-sandboxed commands or when the metric could not be read.
591+
pub fn memory_peak_bytes(&self) -> Option<u64> {
592+
self.statistics.memory_peak
593+
}
575594
}
576595

577596
enum OutputKind {

src/cmd/sandbox.rs

Lines changed: 135 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::Workspace;
2-
use crate::cmd::{Command, CommandError, ProcessLinesActions, ProcessOutput};
2+
use crate::cmd::{Command, CommandError, ProcessLinesActions, ProcessOutput, ProcessStatistics};
33
use log::{error, info};
44
use serde::Deserialize;
55
use std::error::Error;
@@ -229,6 +229,7 @@ impl SandboxBuilder {
229229
fn create(self, workspace: &Workspace) -> Result<Container<'_>, CommandError> {
230230
let mut args: Vec<String> = vec!["create".into()];
231231

232+
// Mounts are container-level config, always on `docker create`
232233
for mount in &self.mounts {
233234
std::fs::create_dir_all(&mount.host_path)?;
234235

@@ -243,16 +244,7 @@ impl SandboxBuilder {
243244
}
244245
}
245246

246-
for (var, value) in &self.env {
247-
args.push("-e".into());
248-
args.push(format! {"{var}={value}"})
249-
}
250-
251-
if let Some(workdir) = self.workdir {
252-
args.push("-w".into());
253-
args.push(workdir);
254-
}
255-
247+
// Resource limits and networking are container-level config
256248
if let Some(limit) = self.memory_limit {
257249
args.push("-m".into());
258250
args.push(limit.to_string());
@@ -263,11 +255,6 @@ impl SandboxBuilder {
263255
args.push(limit.to_string());
264256
}
265257

266-
if let Some(user) = self.user {
267-
args.push("--user".into());
268-
args.push(user);
269-
}
270-
271258
if !self.enable_networking {
272259
args.push("--network".into());
273260
args.push("none".into());
@@ -279,9 +266,10 @@ impl SandboxBuilder {
279266

280267
args.push(workspace.sandbox_image().name.clone());
281268

282-
for arg in self.cmd {
283-
args.push(arg);
284-
}
269+
// Use an idle command; the real command runs via `docker exec` so the container stays
270+
// alive after the command finishes, allowing us to read cgroup metrics.
271+
args.push("sleep".into());
272+
args.push("infinity".into());
285273

286274
let out = Command::new(workspace, "docker")
287275
.args(&args)
@@ -290,6 +278,10 @@ impl SandboxBuilder {
290278
Ok(Container {
291279
id: out.stdout_lines()[0].clone(),
292280
workspace,
281+
cmd: self.cmd,
282+
env: self.env,
283+
workdir: self.workdir,
284+
user: self.user,
293285
})
294286
}
295287

@@ -348,6 +340,11 @@ struct Container<'w> {
348340
// Docker container ID
349341
id: String,
350342
workspace: &'w Workspace,
343+
// Command-level config for `docker exec` (not baked into `docker create`)
344+
cmd: Vec<String>,
345+
env: Vec<(String, String)>,
346+
workdir: Option<String>,
347+
user: Option<String>,
351348
}
352349

353350
impl fmt::Display for Container<'_> {
@@ -370,6 +367,82 @@ impl Container<'_> {
370367
Ok(data.pop().unwrap())
371368
}
372369

370+
/// Start the container in detached mode (without `-a`).
371+
fn start(&self) -> Result<(), CommandError> {
372+
Command::new(self.workspace, "docker")
373+
.args(&["start", &self.id])
374+
.log_output(false)
375+
.run()
376+
.map(|_| ())
377+
}
378+
379+
/// Stop a running container. Uses `-t 1` to give `sleep infinity` a short grace period.
380+
fn stop(&self) -> Result<(), CommandError> {
381+
Command::new(self.workspace, "docker")
382+
.args(&["stop", "-t", "1", &self.id])
383+
.log_output(false)
384+
.run()
385+
.map(|_| ())
386+
}
387+
388+
/// Helper to `docker exec cat <path>` and return stdout lines on success.
389+
fn exec_cat_file(&self, path: &str) -> Option<Vec<String>> {
390+
Command::new(self.workspace, "docker")
391+
.args(&["exec", &self.id, "cat", path])
392+
.log_output(false)
393+
.log_command(false)
394+
.run_capture()
395+
.ok()
396+
.map(|o| o.stdout_lines().to_vec())
397+
}
398+
399+
/// Best-effort read of peak memory usage from the still-running container.
400+
/// Tries cgroups v2 first, then falls back to cgroups v1.
401+
fn read_memory_peak(&self) -> Option<u64> {
402+
let paths = [
403+
"/sys/fs/cgroup/memory.peak", // v2
404+
"/sys/fs/cgroup/memory/memory.max_usage_in_bytes", // v1
405+
];
406+
for path in paths {
407+
if let Some(val) = self
408+
.exec_cat_file(path)
409+
.and_then(|lines| lines.first()?.trim().parse::<u64>().ok())
410+
{
411+
return Some(val);
412+
}
413+
}
414+
None
415+
}
416+
417+
/// Check if any OOM kills occurred in the container's cgroup.
418+
///
419+
/// With the `docker exec` model, the OOM killer may only kill the exec'd process
420+
/// while `sleep infinity` (PID 1) survives. In that case `docker inspect` won't
421+
/// report `OOMKilled`, so we check the cgroup events directly.
422+
/// Tries cgroups v2 first, then falls back to cgroups v1.
423+
fn check_cgroup_oom(&self) -> bool {
424+
// Both v1 and v2 expose `oom_kill <count>` — just in different files.
425+
let paths = [
426+
"/sys/fs/cgroup/memory.events", // v2
427+
"/sys/fs/cgroup/memory/memory.oom_control", // v1
428+
];
429+
for path in paths {
430+
if let Some(lines) = self.exec_cat_file(path) {
431+
let found = lines.iter().any(|line| {
432+
line.strip_prefix("oom_kill ")
433+
.and_then(|rest| rest.trim().parse::<u64>().ok())
434+
.is_some_and(|count| count > 0)
435+
});
436+
if found {
437+
return true;
438+
}
439+
// File existed but no OOM — don't try the other version
440+
return false;
441+
}
442+
}
443+
false
444+
}
445+
373446
#[allow(clippy::type_complexity)]
374447
fn run(
375448
&self,
@@ -380,8 +453,32 @@ impl Container<'_> {
380453
log_command: bool,
381454
capture: bool,
382455
) -> Result<ProcessOutput, CommandError> {
456+
// Start the container in detached mode (runs `sleep infinity`)
457+
self.start()?;
458+
459+
// Build the `docker exec` command with env/workdir/user from the sandbox config
460+
let mut args: Vec<String> = vec!["exec".into()];
461+
462+
for (var, value) in &self.env {
463+
args.push("-e".into());
464+
args.push(format!("{var}={value}"));
465+
}
466+
467+
if let Some(ref workdir) = self.workdir {
468+
args.push("-w".into());
469+
args.push(workdir.clone());
470+
}
471+
472+
if let Some(ref user) = self.user {
473+
args.push("--user".into());
474+
args.push(user.clone());
475+
}
476+
477+
args.push(self.id.clone());
478+
args.extend(self.cmd.iter().cloned());
479+
383480
let mut cmd = Command::new(self.workspace, "docker")
384-
.args(&["start", "-a", &self.id])
481+
.args(&args)
385482
.timeout(timeout)
386483
.log_output(log_output)
387484
.log_command(log_command)
@@ -392,23 +489,39 @@ impl Container<'_> {
392489
}
393490

394491
let res = cmd.run_inner(capture);
492+
493+
// Read peak memory usage while the container is still running (best-effort)
494+
let memory_peak = self.read_memory_peak();
495+
496+
// Check OOM via cgroup events (catches cases where only the exec'd process
497+
// was killed, leaving the container's init process alive)
498+
let cgroup_oom = self.check_cgroup_oom();
499+
500+
// Explicitly stop the container now that we're done reading metrics.
501+
// The scopeguard will still call `docker rm -f` for final cleanup.
502+
let _ = self.stop();
503+
395504
let details = self.inspect()?;
396505

397506
// Return a different error if the container was killed due to an OOM
398-
if details.state.oom_killed {
507+
if details.state.oom_killed || cgroup_oom {
399508
Err(match res {
400509
Ok(_) | Err(CommandError::ExecutionFailed { .. }) => CommandError::SandboxOOM,
401510
Err(err) => err,
402511
})
403512
} else {
404-
res
513+
res.map(|mut output| {
514+
output.statistics = ProcessStatistics { memory_peak };
515+
output
516+
})
405517
}
406518
}
407519

408520
fn delete(&self) -> Result<(), CommandError> {
409521
Command::new(self.workspace, "docker")
410522
.args(&["rm", "-f", &self.id])
411523
.run()
524+
.map(|_| ())
412525
}
413526
}
414527

src/crates/git.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ impl CrateTrait for GitRepo {
101101
if private_repository && res.is_err() {
102102
Err(PrepareError::PrivateGitRepository.into())
103103
} else {
104-
Ok(res?)
104+
Ok(res.map(|_| ())?)
105105
}
106106
}
107107

tests/buildtest/mod.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,30 @@ fn test_process_lines() {
163163
});
164164
}
165165

166+
#[test]
167+
#[cfg(not(windows))]
168+
fn test_memory_peak() {
169+
runner::run("hello-world", |run| {
170+
run.run(
171+
SandboxBuilder::new()
172+
.enable_networking(false)
173+
.memory_limit(Some(512 * 1024 * 1024)),
174+
|build| {
175+
let output = build.cargo().args(&["run"]).run_capture()?;
176+
if let Some(peak) = output.memory_peak_bytes() {
177+
assert!(peak > 0, "memory peak should be positive");
178+
assert!(
179+
peak < 512 * 1024 * 1024,
180+
"memory peak should be below the limit"
181+
);
182+
}
183+
Ok(())
184+
},
185+
)?;
186+
Ok(())
187+
});
188+
}
189+
166190
#[test]
167191
#[cfg(not(windows))]
168192
fn test_sandbox_oom() {

0 commit comments

Comments
 (0)