-
Notifications
You must be signed in to change notification settings - Fork 275
Expand file tree
/
Copy pathvmexec.swift
More file actions
271 lines (237 loc) · 9.86 KB
/
vmexec.swift
File metadata and controls
271 lines (237 loc) · 9.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
//===----------------------------------------------------------------------===//
// Copyright © 2025-2026 Apple Inc. and the Containerization project authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//===----------------------------------------------------------------------===//
/// NOTE: This binary implements a very small subset of the OCI runtime spec, mostly just
/// the process configurations. Mounts are somewhat functional, but masked and read only paths
/// aren't checked today. Today the namespaces are also ignored, and we always spawn a new pid
/// and mount namespace.
import ArgumentParser
import ContainerizationError
import ContainerizationOCI
import ContainerizationOS
import FoundationEssentials
import LCShim
import Logging
import Musl
import SystemPackage
@main
struct App: ParsableCommand {
static let ackPid = "AckPid"
static let ackConsole = "AckConsole"
static let configuration = CommandConfiguration(
commandName: "vmexec",
version: "0.1.0",
subcommands: [
ExecCommand.self,
RunCommand.self,
]
)
}
extension App {
/// Applies O_CLOEXEC to all file descriptors currently open for
/// the process except the stdio fd values
static func applyCloseExecOnFDs() throws {
let minFD = 2 // stdin, stdout, stderr should be preserved
let fdList = try FileManager.default.contentsOfDirectory(atPath: "/proc/self/fd")
for fdStr in fdList {
guard let fd = Int(fdStr) else {
continue
}
if fd <= minFD {
continue
}
_ = fcntl(Int32(fd), F_SETFD, FD_CLOEXEC)
}
}
static func exec(process: ContainerizationOCI.Process, currentEnv: [String]? = nil) throws {
guard !process.args.isEmpty else {
throw App.Errno(stage: "exec", info: "process args cannot be empty")
}
let executableArg = process.args[0]
let resolvedExecutable: URL
if executableArg.contains("/") {
if executableArg.hasPrefix("/") {
resolvedExecutable = URL(fileURLWithPath: executableArg)
} else {
resolvedExecutable = URL(fileURLWithPath: process.cwd).appendingPathComponent(executableArg).standardized
}
guard FileManager.default.fileExists(atPath: resolvedExecutable.path) else {
throw App.Failure(message: "failed to find target executable \(executableArg)")
}
} else {
let path = Path.findPath(currentEnv) ?? Path.getCurrentPath()
guard let found = Path.lookPath(executableArg, path: path) else {
throw App.Failure(message: "failed to find target executable \(executableArg)")
}
resolvedExecutable = found
}
let executable = strdup(resolvedExecutable.path)
var argv = process.args.map { strdup($0) }
argv += [nil]
let env = process.env.map { strdup($0) } + [nil]
let cwd = process.cwd
guard chdir(cwd) == 0 else {
throw App.Errno(stage: "chdir(cwd)", info: "failed to change directory to '\(cwd)'")
}
guard execvpe(executable, argv, env) != -1 else {
throw App.Errno(stage: "execvpe(\(String(describing: executable)))", info: "failed to exec [\(process.args.joined(separator: " "))]")
}
fatalError("execvpe failed")
}
static func setPermissions(user: ContainerizationOCI.User) throws {
if user.additionalGids.count > 0 {
guard setgroups(user.additionalGids.count, user.additionalGids) == 0 else {
throw App.Errno(stage: "setgroups()")
}
}
guard setgid(user.gid) == 0 else {
throw App.Errno(stage: "setgid()")
}
// NOTE: setuid has to be done last because once the uid has been
// changed, then the process will lose privilege to set the group
// and supplementary groups
guard setuid(user.uid) == 0 else {
throw App.Errno(stage: "setuid()")
}
}
static func fixStdioPerms(user: ContainerizationOCI.User) throws {
for i in 0...2 {
var fdStat = stat()
try withUnsafeMutablePointer(to: &fdStat) { pointer in
guard fstat(Int32(i), pointer) == 0 else {
throw App.Errno(stage: "fstat(fd)")
}
}
let desired = uid_t(user.uid)
if fdStat.st_uid != desired {
guard fchown(Int32(i), desired, fdStat.st_gid) != -1 else {
throw App.Errno(stage: "fchown(\(i))")
}
}
}
}
static func setRLimits(rlimits: [ContainerizationOCI.POSIXRlimit]) throws {
for rl in rlimits {
var limit = rlimit(rlim_cur: rl.soft, rlim_max: rl.hard)
let resource: Int32
switch rl.type {
case "RLIMIT_AS":
resource = RLIMIT_AS
case "RLIMIT_CORE":
resource = RLIMIT_CORE
case "RLIMIT_CPU":
resource = RLIMIT_CPU
case "RLIMIT_DATA":
resource = RLIMIT_DATA
case "RLIMIT_FSIZE":
resource = RLIMIT_FSIZE
case "RLIMIT_LOCKS":
resource = RLIMIT_LOCKS
case "RLIMIT_MEMLOCK":
resource = RLIMIT_MEMLOCK
case "RLIMIT_MSGQUEUE":
resource = RLIMIT_MSGQUEUE
case "RLIMIT_NICE":
resource = RLIMIT_NICE
case "RLIMIT_NOFILE":
resource = RLIMIT_NOFILE
case "RLIMIT_NPROC":
resource = RLIMIT_NPROC
case "RLIMIT_RSS":
resource = RLIMIT_RSS
case "RLIMIT_RTPRIO":
resource = RLIMIT_RTPRIO
case "RLIMIT_RTTIME":
resource = RLIMIT_RTTIME
case "RLIMIT_SIGPENDING":
resource = RLIMIT_SIGPENDING
case "RLIMIT_STACK":
resource = RLIMIT_STACK
default:
errno = EINVAL
throw App.Errno(stage: "rlimit key unknown")
}
guard setrlimit(resource, &limit) == 0 else {
throw App.Errno(stage: "setrlimit()")
}
}
}
static func prepareCapabilities(capabilities: ContainerizationOCI.LinuxCapabilities) throws -> ContainerizationOS.LinuxCapabilities? {
// Create capabilities instance from OCI config
var caps = ContainerizationOS.LinuxCapabilities()
caps.set(which: [.effective], caps: (capabilities.effective ?? []).compactMap { try? CapabilityName(rawValue: $0) })
caps.set(which: [.permitted], caps: (capabilities.permitted ?? []).compactMap { try? CapabilityName(rawValue: $0) })
caps.set(which: [.inheritable], caps: (capabilities.inheritable ?? []).compactMap { try? CapabilityName(rawValue: $0) })
caps.set(which: [.bounding], caps: (capabilities.bounding ?? []).compactMap { try? CapabilityName(rawValue: $0) })
caps.set(which: [.ambient], caps: (capabilities.ambient ?? []).compactMap { try? CapabilityName(rawValue: $0) })
// Apply bounding set BEFORE user change (drop capabilities early)
do {
try caps.apply(kind: .bounds)
} catch {
throw App.Failure(message: "failed to apply bounding set capabilities: \(error)")
}
// Set keep caps to preserve capabilities across setuid()
do {
try LinuxCapabilities.setKeepCaps()
} catch {
throw App.Failure(message: "failed to set keep caps: \(error)")
}
return caps
}
static func finishCapabilities(_ caps: ContainerizationOS.LinuxCapabilities?) throws {
guard let caps = caps else { return }
do {
try LinuxCapabilities.clearKeepCaps()
} catch {
throw App.Failure(message: "failed to clear keep caps: \(error)")
}
do {
try caps.apply(kind: [.caps])
} catch {
throw App.Failure(message: "failed to apply final capabilities: \(error)")
}
try? caps.apply(kind: [.ambs])
}
static func setNoNewPrivileges(process: ContainerizationOCI.Process) throws {
guard process.noNewPrivileges else { return }
guard CZ_prctl_set_no_new_privs() == 0 else {
throw App.Errno(stage: "prctl(PR_SET_NO_NEW_PRIVS)")
}
}
static func Errno(stage: String, info: String = "") -> ContainerizationError {
let posix = POSIXError(.init(rawValue: errno)!, userInfo: ["stage": stage])
return ContainerizationError(.internalError, message: "\(info) \(String(describing: posix))")
}
static func Failure(message: String) -> ContainerizationError {
ContainerizationError(
.internalError,
message: message
)
}
static func writeError(_ error: Error) {
let errorPipe = FileDescriptor(rawValue: 5)
let errorMessage: String
if let czError = error as? ContainerizationError {
errorMessage = czError.description
} else {
errorMessage = String(describing: error)
}
let bytes = Array(errorMessage.utf8)
_ = try? bytes.withUnsafeBytes { buffer in
try errorPipe.write(buffer)
}
try? errorPipe.close()
}
}