diff --git a/Sources/Containerization/LinuxProcessConfiguration.swift b/Sources/Containerization/LinuxProcessConfiguration.swift index 2644ccde..8ca9a2e4 100644 --- a/Sources/Containerization/LinuxProcessConfiguration.swift +++ b/Sources/Containerization/LinuxProcessConfiguration.swift @@ -372,6 +372,8 @@ public struct LinuxProcessConfiguration: Sendable { public var user: ContainerizationOCI.User = .init() /// The rlimits for the container process. public var rlimits: [LinuxRLimit] = [] + /// Prevents the process from gaining additional privileges. + public var noNewPrivileges: Bool = false /// The Linux capabilities for the container process. public var capabilities: LinuxCapabilities = .allCapabilities /// Whether to allocate a pseudo terminal for the process. If you'd like interactive @@ -437,6 +439,7 @@ public struct LinuxProcessConfiguration: Sendable { args: self.arguments, cwd: self.workingDirectory, env: self.environmentVariables, + noNewPrivileges: self.noNewPrivileges, capabilities: self.capabilities.toOCI(), user: self.user, rlimits: self.rlimits.map { $0.toOCI() }, diff --git a/vminitd/Sources/LCShim/include/syscall.h b/vminitd/Sources/LCShim/include/syscall.h index 52f60da3..5e9f8a70 100644 --- a/vminitd/Sources/LCShim/include/syscall.h +++ b/vminitd/Sources/LCShim/include/syscall.h @@ -23,6 +23,8 @@ int CZ_pivot_root(const char *new_root, const char *put_old); int CZ_set_sub_reaper(); +int CZ_set_no_new_privs(); + #ifndef SYS_pidfd_open #define SYS_pidfd_open 434 #endif diff --git a/vminitd/Sources/LCShim/syscall.c b/vminitd/Sources/LCShim/syscall.c index 4137aa84..b289456f 100644 --- a/vminitd/Sources/LCShim/syscall.c +++ b/vminitd/Sources/LCShim/syscall.c @@ -26,6 +26,8 @@ int CZ_pivot_root(const char *new_root, const char *put_old) { int CZ_set_sub_reaper() { return prctl(PR_SET_CHILD_SUBREAPER, 1); } +int CZ_set_no_new_privs() { return prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); } + int CZ_pidfd_open(pid_t pid, unsigned int flags) { // Musl doesn't have pidfd_open. return syscall(SYS_pidfd_open, pid, flags); diff --git a/vminitd/Sources/vmexec/ExecCommand.swift b/vminitd/Sources/vmexec/ExecCommand.swift index e31b62e0..b1b2feef 100644 --- a/vminitd/Sources/vmexec/ExecCommand.swift +++ b/vminitd/Sources/vmexec/ExecCommand.swift @@ -133,18 +133,23 @@ struct ExecCommand: ParsableCommand { try App.applyCloseExecOnFDs() try App.setRLimits(rlimits: process.rlimits) - // Prepare capabilities (before user change) + // Prepare capabilities (before user change). let preparedCaps = try App.prepareCapabilities(capabilities: process.capabilities ?? ContainerizationOCI.LinuxCapabilities()) // Change stdio to be owned by the requested user. try App.fixStdioPerms(user: process.user) - // Set uid, gid, and supplementary groups + // Set uid, gid, and supplementary groups. try App.setPermissions(user: process.user) - // Finish capabilities (after user change) + // Finish capabilities (after user change). try App.finishCapabilities(preparedCaps) + // Set no_new_privs (after user/capability changes). + if process.noNewPrivileges { + try App.setNoNewPrivs() + } + try App.exec(process: process, currentEnv: process.env) } else { // parent process // Send our child's pid to our parent before we exit. diff --git a/vminitd/Sources/vmexec/RunCommand.swift b/vminitd/Sources/vmexec/RunCommand.swift index 7759a802..1e83da5c 100644 --- a/vminitd/Sources/vmexec/RunCommand.swift +++ b/vminitd/Sources/vmexec/RunCommand.swift @@ -167,7 +167,7 @@ struct RunCommand: ParsableCommand { try App.setRLimits(rlimits: process.rlimits) - // Prepare capabilities (before user change) + // Prepare capabilities (before user change). let preparedCaps = try App.prepareCapabilities(capabilities: process.capabilities ?? ContainerizationOCI.LinuxCapabilities()) // Change stdio to be owned by the requested user. @@ -176,9 +176,14 @@ struct RunCommand: ParsableCommand { // Set uid, gid, and supplementary groups. try App.setPermissions(user: process.user) - // Finish capabilities (after user change) + // Finish capabilities (after user change). try App.finishCapabilities(preparedCaps) + // Set no_new_privs (after user/capability changes). + if process.noNewPrivileges { + try App.setNoNewPrivs() + } + // Finally execve the container process. try App.exec(process: process, currentEnv: process.env) } diff --git a/vminitd/Sources/vmexec/vmexec.swift b/vminitd/Sources/vmexec/vmexec.swift index 5643cf70..89b01877 100644 --- a/vminitd/Sources/vmexec/vmexec.swift +++ b/vminitd/Sources/vmexec/vmexec.swift @@ -106,6 +106,12 @@ extension App { fatalError("execvpe failed") } + static func setNoNewPrivs() throws { + guard CZ_set_no_new_privs() == 0 else { + throw App.Errno(stage: "prctl(PR_SET_NO_NEW_PRIVS)") + } + } + static func setPermissions(user: ContainerizationOCI.User) throws { if user.additionalGids.count > 0 { guard setgroups(user.additionalGids.count, user.additionalGids) == 0 else {