From 3fda71b2da3c9cb6d73300e605677076e271d336 Mon Sep 17 00:00:00 2001 From: Noel Date: Sat, 28 Feb 2026 21:25:54 +0100 Subject: [PATCH] feat(seccomp): add seccomp-bpf filter support with TSYNC Add optional seccomp-bpf filter installation to ExecutableSpec. The caller provides a filter program as BPF instruction tuples and styrolite installs it at the correct point in the execution sequence -- after PR_SET_NO_NEW_PRIVS and capability setup, but before execvpe(). Uses seccomp(2) with SECCOMP_FILTER_FLAG_TSYNC instead of prctl(PR_SET_SECCOMP) to synchronize the filter across all threads, preventing a race where a pre-existing thread could call a blocked syscall before the filter is applied. The seccomp field on ExecutableSpec is Optional and serde(default), so existing configs without seccomp continue to work unchanged. --- src/config.rs | 7 +++++++ src/lib.rs | 1 + src/seccomp.rs | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ src/wrap.rs | 11 +++++++++++ 4 files changed, 67 insertions(+) create mode 100644 src/seccomp.rs diff --git a/src/config.rs b/src/config.rs index fc2a6a0..9d0ccaf 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,5 +1,6 @@ use crate::caps::CapabilityBit; use crate::namespace::Namespace; +use crate::seccomp::SeccompFilter; use anyhow::{Result, bail}; use libc::{gid_t, pid_t, uid_t}; use serde::{Deserialize, Serialize}; @@ -74,6 +75,12 @@ pub struct ExecutableSpec { /// If `true`, sets `PR_SET_NO_NEW_PRIVS` before /// spawning the target executable. pub no_new_privs: bool, + + /// An optional seccomp-bpf filter program. Applied after capabilities + /// are set and `PR_SET_NO_NEW_PRIVS` is enabled, but before `execvpe()`. + /// Requires `no_new_privs = true`. + #[serde(default)] + pub seccomp: Option, } #[derive(Default, Debug, Serialize, Deserialize)] diff --git a/src/lib.rs b/src/lib.rs index fe2788f..8afabf8 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,6 +4,7 @@ pub mod config; pub mod mount; pub mod namespace; pub mod runner; +pub mod seccomp; pub mod signal; pub mod unshare; pub mod wrap; diff --git a/src/seccomp.rs b/src/seccomp.rs new file mode 100644 index 0000000..9a5dccd --- /dev/null +++ b/src/seccomp.rs @@ -0,0 +1,48 @@ +/// A seccomp-bpf filter program. +/// +/// The caller builds the BPF program as a list of (code, jt, jf, k) +/// instructions. Styrolite installs it via `seccomp(2)` after +/// capabilities are set but before `execvpe()`. +/// +/// Requires `no_new_privs = true` on the `ExecutableSpec`. +#[derive(Clone, Debug, serde::Serialize, serde::Deserialize)] +pub struct SeccompFilter { + /// BPF instructions as (code, jt, jf, k) tuples. + pub instructions: Vec<(u16, u8, u8, u32)>, +} + +impl SeccompFilter { + /// Install the seccomp filter via `seccomp(2)` with `SECCOMP_FILTER_FLAG_TSYNC`. + /// + /// Uses `seccomp(2)` instead of `prctl(PR_SET_SECCOMP)` to synchronize the + /// filter across all threads via `SECCOMP_FILTER_FLAG_TSYNC`. + /// + /// # Safety + /// + /// Must be called after `prctl(PR_SET_NO_NEW_PRIVS, 1)` and before `execvpe()`. + /// The caller must ensure the BPF program is valid. + pub unsafe fn install(&self) -> std::io::Result<()> { + let filters: Vec = self + .instructions + .iter() + .map(|&(code, jt, jf, k)| libc::sock_filter { code, jt, jf, k }) + .collect(); + let prog = libc::sock_fprog { + len: filters.len() as u16, + filter: filters.as_ptr() as *mut _, + }; + + // Use seccomp(2) with TSYNC to synchronize filter across all threads. + // SECCOMP_SET_MODE_FILTER = 1, SECCOMP_FILTER_FLAG_TSYNC = 1 + let ret = libc::syscall( + libc::SYS_seccomp, + 1u64, // SECCOMP_SET_MODE_FILTER + 1u64, // SECCOMP_FILTER_FLAG_TSYNC + &prog as *const _, + ); + if ret != 0 { + return Err(std::io::Error::last_os_error()); + } + Ok(()) + } +} diff --git a/src/wrap.rs b/src/wrap.rs index 8f3d6aa..785a193 100644 --- a/src/wrap.rs +++ b/src/wrap.rs @@ -628,6 +628,17 @@ impl ExecutableSpec { self.set_no_new_privs()?; } + // Install seccomp-bpf filter if provided. + // Must be after set_no_new_privs (required for unprivileged seccomp) + // and before execvpe (filter applies to the exec'd process). + if let Some(ref seccomp) = self.seccomp { + unsafe { + if let Err(e) = seccomp.install() { + bail!("failed to install seccomp filter: {e}"); + } + } + } + unsafe { if libc::execvpe( program_cstring.as_ptr(),