Skip to content
23 changes: 23 additions & 0 deletions Sources/Containerization/DNSConfiguration.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@
// limitations under the License.
//===----------------------------------------------------------------------===//

import ContainerizationError
import ContainerizationExtras

/// DNS configuration for a container. The values will be used to
/// construct /etc/resolv.conf for a given container.
public struct DNS: Sendable {
Expand Down Expand Up @@ -41,6 +44,26 @@ public struct DNS: Sendable {
self.searchDomains = searchDomains
self.options = options
}

/// Validates the DNS configuration.
///
/// Ensures that all nameserver entries are valid IPv4 or IPv6 addresses.
/// Arbitrary hostnames are not permitted as nameservers.
///
/// - Throws: ``ContainerizationError`` with code `.invalidArgument` if
/// any nameserver is not a valid IP address.
public func validate() throws {
for nameserver in nameservers {
let isValidIPv4 = (try? IPv4Address(nameserver)) != nil
let isValidIPv6 = (try? IPv6Address(nameserver)) != nil
if !isValidIPv4 && !isValidIPv6 {
throw ContainerizationError(
.invalidArgument,
message: "nameserver '\(nameserver)' is not a valid IPv4 or IPv6 address"
)
}
}
}
}

extension DNS {
Expand Down
6 changes: 2 additions & 4 deletions Sources/Containerization/Image/InitImage.swift
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,8 @@ extension InitImage {
var result = try writer.create(from: rootfs)
let layerDescriptor = Descriptor(mediaType: ContainerizationOCI.MediaTypes.imageLayerGzip, digest: result.digest.digestString, size: result.size)

// TODO: compute and fill in the correct diffID for the above layer
// We currently put in the sha of the fully compressed layer, this needs to be replaced with
// the sha of the uncompressed layer.
let rootfsConfig = ContainerizationOCI.Rootfs(type: "layers", diffIDs: [result.digest.digestString])
let diffID = try ContentWriter.diffID(of: rootfs)
let rootfsConfig = ContainerizationOCI.Rootfs(type: "layers", diffIDs: [diffID.digestString])
let runtimeConfig = ContainerizationOCI.ImageConfig(labels: labels)
let imageConfig = ContainerizationOCI.Image(architecture: platform.architecture, os: platform.os, config: runtimeConfig, rootfs: rootfsConfig)
result = try writer.create(from: imageConfig)
Expand Down
1 change: 1 addition & 0 deletions Sources/Containerization/Vminitd.swift
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,7 @@ extension Vminitd {

/// Configure DNS within the sandbox's environment.
public func configureDNS(config: DNS, location: String) async throws {
try config.validate()
_ = try await client.configureDns(
.with {
$0.location = location
Expand Down
177 changes: 177 additions & 0 deletions Sources/ContainerizationOCI/Content/ContentWriter.swift
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
// limitations under the License.
//===----------------------------------------------------------------------===//

import Compression
import ContainerizationError
import Crypto
import Foundation
import NIOCore
import zlib

/// Provides a context to write data into a directory.
public class ContentWriter {
Expand Down Expand Up @@ -60,6 +62,175 @@ public class ContentWriter {
return try self.write(data)
}

/// Computes the SHA256 digest of the uncompressed content of a gzip file.
///
/// Per the OCI Image Specification, a DiffID is the SHA256 digest of the
/// uncompressed layer content. This method streams the compressed file in
/// chunks, decompresses through Apple's Compression framework, and feeds
/// each decompressed chunk into an incremental SHA256 hasher. Neither the
/// full compressed nor the full decompressed data is held in memory.
///
/// - Parameter url: The URL of the gzip-compressed file.
/// - Returns: The SHA256 digest of the uncompressed content.
public static func diffID(of url: URL) throws -> SHA256.Digest {
let fileHandle = try FileHandle(forReadingFrom: url)
defer { fileHandle.closeFile() }

// Read just enough to parse the gzip header (initial 512 bytes is plenty).
let headerReadSize = 512
guard let headerData = Self.readExactly(fileHandle: fileHandle, count: headerReadSize), !headerData.isEmpty else {
throw ContentWriterError.invalidGzip
}
let headerSize = try Self.gzipHeaderSize(headerData)

// Read the gzip trailer (last 8 bytes) to validate CRC32 + ISIZE later.
// Seek to the end to get the file size, then read the trailer.
fileHandle.seekToEndOfFile()
let fileSize = fileHandle.offsetInFile
guard fileSize >= 8 else {
throw ContentWriterError.gzipTrailerMismatch
}
fileHandle.seek(toFileOffset: fileSize - 8)
guard let trailerData = Self.readExactly(fileHandle: fileHandle, count: 8) else {
throw ContentWriterError.gzipTrailerMismatch
}
let expectedCRC = UInt32(trailerData[trailerData.startIndex])
| (UInt32(trailerData[trailerData.startIndex + 1]) << 8)
| (UInt32(trailerData[trailerData.startIndex + 2]) << 16)
| (UInt32(trailerData[trailerData.startIndex + 3]) << 24)
let expectedSize = UInt32(trailerData[trailerData.startIndex + 4])
| (UInt32(trailerData[trailerData.startIndex + 5]) << 8)
| (UInt32(trailerData[trailerData.startIndex + 6]) << 16)
| (UInt32(trailerData[trailerData.startIndex + 7]) << 24)

// Seek past the gzip header to the start of the deflate stream.
// The deflate data spans from headerSize to fileSize - 8 (the last 8 bytes
// are the gzip trailer: CRC32 + ISIZE). We must not feed the trailer to
// the decompressor.
fileHandle.seek(toFileOffset: UInt64(headerSize))
var compressedBytesRemaining = Int(fileSize) - headerSize - 8
guard compressedBytesRemaining >= 0 else {
throw ContentWriterError.invalidGzip
}

// Set up the decompression stream.
let chunkSize = 65_536
let sourceBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: chunkSize)
let destinationBuffer = UnsafeMutablePointer<UInt8>.allocate(capacity: chunkSize)
defer {
sourceBuffer.deallocate()
destinationBuffer.deallocate()
}

let stream = UnsafeMutablePointer<compression_stream>.allocate(capacity: 1)
defer { stream.deallocate() }

var status = compression_stream_init(stream, COMPRESSION_STREAM_DECODE, COMPRESSION_ZLIB)
guard status != COMPRESSION_STATUS_ERROR else {
throw ContentWriterError.decompressionFailed
}
defer { compression_stream_destroy(stream) }

// Start with an empty source; we fill it from the file below.
stream.pointee.src_ptr = UnsafePointer(sourceBuffer)
stream.pointee.src_size = 0
stream.pointee.dst_ptr = destinationBuffer
stream.pointee.dst_size = chunkSize

var hasher = SHA256()
var runningCRC: uLong = crc32(0, nil, 0)
var totalDecompressedSize: UInt64 = 0
var inputExhausted = false

while status != COMPRESSION_STATUS_END {
// Refill the source buffer when it is exhausted and more data is available.
if stream.pointee.src_size == 0 && !inputExhausted {
let toRead = min(chunkSize, compressedBytesRemaining)
if toRead > 0, let chunk = fileHandle.readData(ofLength: toRead) as Data?, !chunk.isEmpty {
compressedBytesRemaining -= chunk.count
chunk.copyBytes(to: sourceBuffer, count: chunk.count)
stream.pointee.src_ptr = UnsafePointer(sourceBuffer)
stream.pointee.src_size = chunk.count
} else {
inputExhausted = true
}
}

stream.pointee.dst_ptr = destinationBuffer
stream.pointee.dst_size = chunkSize

let flags: Int32 = inputExhausted ? Int32(COMPRESSION_STREAM_FINALIZE.rawValue) : 0
status = compression_stream_process(stream, flags)

switch status {
case COMPRESSION_STATUS_OK, COMPRESSION_STATUS_END:
let produced = chunkSize - stream.pointee.dst_size
if produced > 0 {
let buf = UnsafeBufferPointer(start: destinationBuffer, count: produced)
hasher.update(bufferPointer: UnsafeRawBufferPointer(buf))
runningCRC = crc32(runningCRC, destinationBuffer, uInt(produced))
totalDecompressedSize += UInt64(produced)
}

default:
throw ContentWriterError.decompressionFailed
}
}

// Validate the gzip trailer.
let actualCRC = UInt32(truncatingIfNeeded: runningCRC)
let actualSize = UInt32(truncatingIfNeeded: totalDecompressedSize)

guard expectedCRC == actualCRC, expectedSize == actualSize else {
throw ContentWriterError.gzipTrailerMismatch
}

return hasher.finalize()
}

/// Reads exactly `count` bytes from a FileHandle, returning nil on failure.
private static func readExactly(fileHandle: FileHandle, count: Int) -> Data? {
let data = fileHandle.readData(ofLength: count)
return data.isEmpty ? nil : data
}

/// Parses the gzip header to determine where the raw deflate stream begins.
private static func gzipHeaderSize(_ data: Data) throws -> Int {
guard data.count >= 10,
data[data.startIndex] == 0x1f,
data[data.startIndex + 1] == 0x8b,
data[data.startIndex + 2] == 0x08 // CM must be 8 (deflate) per RFC 1952
else {
throw ContentWriterError.invalidGzip
}

let start = data.startIndex
let flags = data[start + 3]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Question: what's the reason the current changes skipped compression method (CM) (ref https://datatracker.ietf.org/doc/html/rfc1952#page-5) entirely.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch — the header parser was only checking the magic bytes (1f 8b) but not the compression method. I've added a guard for CM == 0x08 (deflate), which is the only method defined by RFC 1952. Anything else will now throw invalidGzip.

var offset = 10

// FEXTRA
if flags & 0x04 != 0 {
guard data.count >= offset + 2 else { throw ContentWriterError.invalidGzip }
let extraLen = Int(data[start + offset]) | (Int(data[start + offset + 1]) << 8)
offset += 2 + extraLen
}
// FNAME
if flags & 0x08 != 0 {
while offset < data.count && data[start + offset] != 0 { offset += 1 }
offset += 1
}
// FCOMMENT
if flags & 0x10 != 0 {
while offset < data.count && data[start + offset] != 0 { offset += 1 }
offset += 1
}
// FHCRC
if flags & 0x02 != 0 { offset += 2 }

guard offset < data.count else { throw ContentWriterError.invalidGzip }
return offset
}

/// Encodes the passed in type as a JSON blob and writes it to the base path.
/// - Parameters:
/// - content: The type to convert to JSON.
Expand All @@ -69,3 +240,9 @@ public class ContentWriter {
return try self.write(data)
}
}

enum ContentWriterError: Error {
case invalidGzip
case decompressionFailed
case gzipTrailerMismatch
}
Loading