diff --git a/Sources/Containerization/ContainerManager.swift b/Sources/Containerization/ContainerManager.swift index bbdbd8c0..7ff386eb 100644 --- a/Sources/Containerization/ContainerManager.swift +++ b/Sources/Containerization/ContainerManager.swift @@ -376,6 +376,7 @@ public struct ContainerManager: Sendable { /// - readOnly: Whether to mount the root filesystem as read-only. /// - networking: Whether to create a network interface for this container. Defaults to `true`. /// When `false`, no network resources are allocated and `releaseNetwork`/`delete` remain safe to call. + /// - progress: Optional handler for tracking rootfs unpacking progress. public mutating func create( _ id: String, reference: String, @@ -383,6 +384,7 @@ public struct ContainerManager: Sendable { writableLayerSizeInBytes: UInt64? = nil, readOnly: Bool = false, networking: Bool = true, + progress: ProgressHandler? = nil, configuration: (inout LinuxContainer.Configuration) throws -> Void ) async throws -> LinuxContainer { let image = try await imageStore.get(reference: reference, pull: true) @@ -393,6 +395,7 @@ public struct ContainerManager: Sendable { writableLayerSizeInBytes: writableLayerSizeInBytes, readOnly: readOnly, networking: networking, + progress: progress, configuration: configuration ) } @@ -407,6 +410,7 @@ public struct ContainerManager: Sendable { /// - readOnly: Whether to mount the root filesystem as read-only. /// - networking: Whether to create a network interface for this container. Defaults to `true`. /// When `false`, no network resources are allocated and `releaseNetwork`/`delete` remain safe to call. + /// - progress: Optional handler for tracking rootfs unpacking progress. public mutating func create( _ id: String, image: Image, @@ -414,6 +418,7 @@ public struct ContainerManager: Sendable { writableLayerSizeInBytes: UInt64? = nil, readOnly: Bool = false, networking: Bool = true, + progress: ProgressHandler? = nil, configuration: (inout LinuxContainer.Configuration) throws -> Void ) async throws -> LinuxContainer { let path = try createContainerRoot(id) @@ -421,7 +426,8 @@ public struct ContainerManager: Sendable { var rootfs = try await unpack( image: image, destination: path.appendingPathComponent("rootfs.ext4"), - size: rootfsSizeInBytes + size: rootfsSizeInBytes, + progress: progress ) if readOnly { rootfs.options.append("ro") @@ -511,10 +517,10 @@ public struct ContainerManager: Sendable { return path } - private func unpack(image: Image, destination: URL, size: UInt64) async throws -> Mount { + private func unpack(image: Image, destination: URL, size: UInt64, progress: ProgressHandler? = nil) async throws -> Mount { do { let unpacker = EXT4Unpacker(blockSizeInBytes: size) - return try await unpacker.unpack(image, for: .current, at: destination) + return try await unpacker.unpack(image, for: .current, at: destination, progress: progress) } catch let err as ContainerizationError { if err.code == .exists { return .block( diff --git a/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift b/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift index 4e588e8d..eb10722d 100644 --- a/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift +++ b/Sources/Containerization/Image/Unpacker/EXT4Unpacker.swift @@ -42,7 +42,7 @@ public struct EXT4Unpacker: Unpacker { archive: URL, compression: ContainerizationArchive.Filter, at path: URL - ) throws { + ) async throws { let cleanedPath = try prepareUnpackPath(path: path) let filesystem = try EXT4.Formatter( FilePath(cleanedPath), @@ -50,11 +50,10 @@ public struct EXT4Unpacker: Unpacker { ) defer { try? filesystem.close() } - try filesystem.unpack( + try await filesystem.unpack( source: archive, format: .paxRestricted, - compression: compression, - progress: nil + compression: compression ) } #endif @@ -84,27 +83,59 @@ public struct EXT4Unpacker: Unpacker { ) defer { try? filesystem.close() } + // Resolve layer paths upfront. When progress reporting is enabled and a layer + // uses zstd, decompress once so both the size-scanning pass and the unpack + // pass share the same decompressed file. + var resolvedLayers: [(file: URL, filter: ContainerizationArchive.Filter)] = [] + var decompressedFiles: [URL] = [] for layer in manifest.layers { try Task.checkCancellation() let content = try await image.getContent(digest: layer.digest) + let compression = try compressionFilter(for: layer.mediaType) + if progress != nil && compression == .zstd { + let decompressed = try ArchiveReader.decompressZstd(content.path) + decompressedFiles.append(decompressed) + resolvedLayers.append((file: decompressed, filter: .none)) + } else { + resolvedLayers.append((file: content.path, filter: compression)) + } + } + defer { + for file in decompressedFiles { + ArchiveReader.cleanUpDecompressedZstd(file) + } + } - let compression: ContainerizationArchive.Filter - switch layer.mediaType { - case MediaTypes.imageLayer, MediaTypes.dockerImageLayer: - compression = .none - case MediaTypes.imageLayerGzip, MediaTypes.dockerImageLayerGzip: - compression = .gzip - case MediaTypes.imageLayerZstd, MediaTypes.dockerImageLayerZstd: - compression = .zstd - default: - throw ContainerizationError(.unsupported, message: "media type \(layer.mediaType) not supported.") + if let progress { + var totalSize: Int64 = 0 + var totalItems: Int = 0 + for layer in resolvedLayers { + try Task.checkCancellation() + let totals = try EXT4.Formatter.scanArchiveHeaders( + format: .paxRestricted, filter: layer.filter, file: layer.file) + totalSize += totals.size + totalItems += totals.items } - try filesystem.unpack( - source: content.path, + var totalEvents: [ProgressEvent] = [] + if totalSize > 0 { + totalEvents.append(ProgressEvent(event: "add-total-size", value: totalSize)) + } + if totalItems > 0 { + totalEvents.append(ProgressEvent(event: "add-total-items", value: totalItems)) + } + if !totalEvents.isEmpty { + await progress(totalEvents) + } + } + + for resolved in resolvedLayers { + try Task.checkCancellation() + let reader = try ArchiveReader( format: .paxRestricted, - compression: compression, - progress: progress + filter: resolved.filter, + file: resolved.file ) + try await filesystem.unpack(reader: reader, progress: progress) } return .block( @@ -123,4 +154,19 @@ public struct EXT4Unpacker: Unpacker { } return blockPath } + + #if os(macOS) + private func compressionFilter(for mediaType: String) throws -> ContainerizationArchive.Filter { + switch mediaType { + case MediaTypes.imageLayer, MediaTypes.dockerImageLayer: + return .none + case MediaTypes.imageLayerGzip, MediaTypes.dockerImageLayerGzip: + return .gzip + case MediaTypes.imageLayerZstd, MediaTypes.dockerImageLayerZstd: + return .zstd + default: + throw ContainerizationError(.unsupported, message: "media type \(mediaType) not supported.") + } + } + #endif } diff --git a/Sources/ContainerizationArchive/ArchiveReader.swift b/Sources/ContainerizationArchive/ArchiveReader.swift index b353b209..6c2bfe54 100644 --- a/Sources/ContainerizationArchive/ArchiveReader.swift +++ b/Sources/ContainerizationArchive/ArchiveReader.swift @@ -125,7 +125,7 @@ public final class ArchiveReader { } /// Decompress a zstd file to a temporary location - private static func decompressZstd(_ source: URL) throws -> URL { + public static func decompressZstd(_ source: URL) throws -> URL { guard let tempDir = createTemporaryDirectory(baseName: "zstd-decompress") else { throw ArchiveError.failedToDetectFormat } @@ -148,13 +148,19 @@ public final class ArchiveReader { return tempFile } + /// Clean up the temporary directory created by `decompressZstd`. + /// The decompressed file is placed inside a unique temporary directory, + /// so removing that directory cleans up everything. + public static func cleanUpDecompressedZstd(_ file: URL) { + try? FileManager.default.removeItem(at: file.deletingLastPathComponent()) + } + deinit { archive_read_free(underlying) try? fileHandle?.close() - // Clean up temp decompressed file if let tempFile = tempDecompressedFile { - try? FileManager.default.removeItem(at: tempFile.deletingLastPathComponent()) + Self.cleanUpDecompressedZstd(tempFile) } } } diff --git a/Sources/ContainerizationEXT4/Formatter+Unpack.swift b/Sources/ContainerizationEXT4/Formatter+Unpack.swift index fee04cc3..09a159f3 100644 --- a/Sources/ContainerizationEXT4/Formatter+Unpack.swift +++ b/Sources/ContainerizationEXT4/Formatter+Unpack.swift @@ -25,7 +25,83 @@ private typealias Hardlinks = [FilePath: FilePath] extension EXT4.Formatter { /// Unpack the provided archive on to the ext4 filesystem. - public func unpack(reader: ArchiveReader, progress: ProgressHandler? = nil) throws { + public func unpack(reader: ArchiveReader, progress: ProgressHandler? = nil) async throws { + try await self.unpackEntries(reader: reader, progress: progress) + } + + /// Unpack an archive at the source URL on to the ext4 filesystem. + public func unpack( + source: URL, + format: ContainerizationArchive.Format = .paxRestricted, + compression: ContainerizationArchive.Filter = .gzip, + progress: ProgressHandler? = nil + ) async throws { + // For zstd, decompress once and reuse for both passes to avoid double decompression. + let fileToRead: URL + let readerFilter: ContainerizationArchive.Filter + var decompressedFile: URL? + if progress != nil && compression == .zstd { + let decompressed = try ArchiveReader.decompressZstd(source) + fileToRead = decompressed + readerFilter = .none + decompressedFile = decompressed + } else { + fileToRead = source + readerFilter = compression + } + defer { + if let decompressedFile { + ArchiveReader.cleanUpDecompressedZstd(decompressedFile) + } + } + + if let progress { + // First pass: scan headers to get totals (fast, metadata only) + let totals = try Self.scanArchiveHeaders(format: format, filter: readerFilter, file: fileToRead) + var totalEvents: [ProgressEvent] = [] + if totals.size > 0 { + totalEvents.append(ProgressEvent(event: "add-total-size", value: totals.size)) + } + if totals.items > 0 { + totalEvents.append(ProgressEvent(event: "add-total-items", value: totals.items)) + } + if !totalEvents.isEmpty { + await progress(totalEvents) + } + } + + // Unpack pass + let reader = try ArchiveReader( + format: format, + filter: readerFilter, + file: fileToRead + ) + try await self.unpackEntries(reader: reader, progress: progress) + } + + /// Scan archive headers to count the total number of bytes in regular files + /// and the total number of entries. + public static func scanArchiveHeaders( + format: ContainerizationArchive.Format, + filter: ContainerizationArchive.Filter, + file: URL + ) throws -> (size: Int64, items: Int) { + let reader = try ArchiveReader(format: format, filter: filter, file: file) + var totalSize: Int64 = 0 + var totalItems: Int = 0 + for (entry, _) in reader.makeStreamingIterator() { + try Task.checkCancellation() + guard entry.path != nil else { continue } + totalItems += 1 + if entry.fileType == .regular, entry.hardlink == nil, let size = entry.size { + totalSize += Int64(size) + } + } + return (size: totalSize, items: totalItems) + } + + /// Core unpack logic. When `progress` is nil the handler calls are skipped. + private func unpackEntries(reader: ArchiveReader, progress: ProgressHandler?) async throws { var hardlinks: Hardlinks = [:] // Allocate a single 128KiB reusable buffer for all files to minimize allocations // and reduce the number of read calls to libarchive. @@ -39,35 +115,33 @@ extension EXT4.Formatter { continue } - defer { - // Count the number of entries - if let progress { - Task { - await progress([ - ProgressEvent(event: "add-items", value: 1) - ]) - } - } - } - pathEntry = preProcessPath(s: pathEntry) let path = FilePath(pathEntry) if path.base.hasPrefix(".wh.") { if path.base == ".wh..wh..opq" { // whiteout directory try self.unlink(path: path.dir, directoryWhiteout: true) + if let progress { + await progress([ProgressEvent(event: "add-items", value: 1)]) + } continue } let startIndex = path.base.index(path.base.startIndex, offsetBy: ".wh.".count) let filePath = String(path.base[startIndex...]) let dir: FilePath = path.dir try self.unlink(path: dir.join(filePath)) + if let progress { + await progress([ProgressEvent(event: "add-items", value: 1)]) + } continue } if let hardlink = entry.hardlink { let hl = preProcessPath(s: hardlink) hardlinks[path] = FilePath(hl) + if let progress { + await progress([ProgressEvent(event: "add-items", value: 1)]) + } continue } let ts = FileTimestamps( @@ -84,13 +158,8 @@ extension EXT4.Formatter { uid: entry.owner, gid: entry.group, xattrs: entry.xattrs, fileBuffer: reusableBuffer) - // Count the size of files if let progress, let size = entry.size { - Task { - await progress([ - ProgressEvent(event: "add-size", value: Int64(size)) - ]) - } + await progress([ProgressEvent(event: "add-size", value: Int64(size))]) } case .symbolicLink: var symlinkTarget: FilePath? @@ -102,8 +171,15 @@ extension EXT4.Formatter { uid: entry.owner, gid: entry.group, xattrs: entry.xattrs) default: + if let progress { + await progress([ProgressEvent(event: "add-items", value: 1)]) + } continue } + + if let progress { + await progress([ProgressEvent(event: "add-items", value: 1)]) + } } guard hardlinks.acyclic else { throw UnpackError.circularLinks @@ -115,21 +191,6 @@ extension EXT4.Formatter { } } - /// Unpack an archive at the source URL on to the ext4 filesystem. - public func unpack( - source: URL, - format: ContainerizationArchive.Format = .paxRestricted, - compression: ContainerizationArchive.Filter = .gzip, - progress: ProgressHandler? = nil - ) throws { - let reader = try ArchiveReader( - format: format, - filter: compression, - file: source - ) - try self.unpack(reader: reader, progress: progress) - } - private func preProcessPath(s: String) -> String { var p = s if p.hasPrefix("./") { diff --git a/Sources/cctl/RootfsCommand.swift b/Sources/cctl/RootfsCommand.swift index 1ac10e93..cd5fd36f 100644 --- a/Sources/cctl/RootfsCommand.swift +++ b/Sources/cctl/RootfsCommand.swift @@ -96,7 +96,7 @@ extension Application { private func outputExt4(archive: URL, to path: URL) async throws { let unpacker = EXT4Unpacker(blockSizeInBytes: 256.mib()) - try unpacker.unpack(archive: archive, compression: .gzip, at: path) + try await unpacker.unpack(archive: archive, compression: .gzip, at: path) } private func outputImage(path: URL, reference: String) async throws { diff --git a/Tests/ContainerizationEXT4Tests/TestEXT4Unpacker.swift b/Tests/ContainerizationEXT4Tests/TestEXT4Unpacker.swift index eaf8f31e..1c22f54f 100644 --- a/Tests/ContainerizationEXT4Tests/TestEXT4Unpacker.swift +++ b/Tests/ContainerizationEXT4Tests/TestEXT4Unpacker.swift @@ -28,7 +28,7 @@ struct Ext4UnpackerTests { .appendingPathComponent("ext4.unpacked.oci.img.delme", isDirectory: false)) final class MockEXT4Unpacker { - static func Unpack(index: String, fsPath: FilePath) throws { + static func Unpack(index: String, fsPath: FilePath) async throws { let fs = try EXT4.Formatter(fsPath) let bundle = Bundle.module guard let indexPath = bundle.url(forResource: index, withExtension: nil) else { @@ -67,14 +67,14 @@ struct Ext4UnpackerTests { guard let layerPath = bundle.url(forResource: layerDigest, withExtension: nil) else { throw NSError(domain: "layer \(layerDigest) not found", code: 1) } - try fs.unpack(source: layerPath) + try await fs.unpack(source: layerPath) } try fs.close() } } - @Test func eXT4Unpacker() throws { - try MockEXT4Unpacker.Unpack(index: self.indexSHA, fsPath: self.fsPath) + @Test func eXT4Unpacker() async throws { + try await MockEXT4Unpacker.Unpack(index: self.indexSHA, fsPath: self.fsPath) let ext4 = try EXT4.EXT4Reader(blockDevice: self.fsPath) let children = try ext4.children(of: EXT4.RootInode) #expect( diff --git a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift index 0407950a..b69dfd42 100644 --- a/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift +++ b/Tests/ContainerizationEXT4Tests/TestFormatterUnpack.swift @@ -18,6 +18,7 @@ #if os(macOS) import ContainerizationArchive +import ContainerizationExtras import Foundation import Testing import SystemPackage @@ -41,10 +42,20 @@ struct Tar2EXT4Test: ~Copyable { "extendedattribute.test": Data([15, 26, 54, 1, 2, 4, 6, 7, 7]), ] + let layerDir: URL + let layer1Path: URL + let layer2Path: URL + init() throws { + // Compute paths before any throwing code to satisfy ~Copyable initialization rules. + let layerDir = FileManager.default.uniqueTemporaryDirectory() + let layer1Path = layerDir.appendingPathComponent("layer1.tar.gz", isDirectory: false) + let layer2Path = layerDir.appendingPathComponent("layer2.tar.gz", isDirectory: false) + self.layerDir = layerDir + self.layer1Path = layer1Path + self.layer2Path = layer2Path + // create layer1 - let layer1Path = FileManager.default.uniqueTemporaryDirectory() - .appendingPathComponent("layer1.tar.gz", isDirectory: false) let layer1Archiver = try ArchiveWriter( configuration: ArchiveWriterConfiguration(format: .paxRestricted, filter: .gzip)) try layer1Archiver.open(file: layer1Path) @@ -56,8 +67,6 @@ struct Tar2EXT4Test: ~Copyable { try layer1Archiver.finishEncoding() // create layer2 - let layer2Path = FileManager.default.uniqueTemporaryDirectory() - .appendingPathComponent("layer2.tar.gz", isDirectory: false) let layer2Archiver = try ArchiveWriter( configuration: ArchiveWriterConfiguration(format: .paxRestricted, filter: .gzip)) try layer2Archiver.open(file: layer2Path) @@ -81,18 +90,22 @@ struct Tar2EXT4Test: ~Copyable { // a new layer overwriting over an existing layer try layer2Archiver.writeEntry(entry: WriteEntry.file(path: "/dir2/file1", permissions: 0o644), data: nil) try layer2Archiver.finishEncoding() + } - let unpacker = try EXT4.Formatter(fsPath) - try unpacker.unpack(source: layer1Path) - try unpacker.unpack(source: layer2Path) - try unpacker.close() + private func unpackLayers() async throws { + let formatter = try EXT4.Formatter(fsPath) + try await formatter.unpack(source: layer1Path) + try await formatter.unpack(source: layer2Path) + try formatter.close() } deinit { try? FileManager.default.removeItem(at: fsPath.url) + try? FileManager.default.removeItem(at: layerDir) } - @Test func testUnpackBasic() throws { + @Test func testUnpackBasic() async throws { + try await unpackLayers() let ext4 = try EXT4.EXT4Reader(blockDevice: fsPath) // just a directory let dir1Inode = try ext4.getInode(number: 12) @@ -130,6 +143,218 @@ struct Tar2EXT4Test: ~Copyable { } } +/// Collects progress events in a thread-safe manner. +private actor ProgressCollector { + var events: [ProgressEvent] = [] + + func append(_ newEvents: [ProgressEvent]) { + events.append(contentsOf: newEvents) + } + + func allEvents() -> [ProgressEvent] { + events + } +} + +struct UnpackProgressTest { + @Test func progressReportsAccurateSizes() async throws { + // Create an archive with files of known sizes + let tempDir = FileManager.default.uniqueTemporaryDirectory() + let archivePath = tempDir.appendingPathComponent("test.tar.gz", isDirectory: false) + let fsPath = FilePath(tempDir.appendingPathComponent("test.ext4.img", isDirectory: false)) + + defer { + try? FileManager.default.removeItem(at: tempDir) + } + + // Create test data with specific sizes + let file1Data = Data(repeating: 0xAA, count: 1024) // 1 KiB + let file2Data = Data(repeating: 0xBB, count: 4096) // 4 KiB + let file3Data = Data(repeating: 0xCC, count: 512) // 512 bytes + let expectedTotalSize: Int64 = 1024 + 4096 + 512 // 5632 bytes + + // Build the archive + let archiver = try ArchiveWriter( + configuration: ArchiveWriterConfiguration(format: .paxRestricted, filter: .gzip)) + try archiver.open(file: archivePath) + + try archiver.writeEntry(entry: WriteEntry.dir(path: "/data", permissions: 0o755), data: nil) + try archiver.writeEntry( + entry: WriteEntry.file(path: "/data/file1.bin", permissions: 0o644, size: Int64(file1Data.count)), + data: file1Data) + try archiver.writeEntry( + entry: WriteEntry.file(path: "/data/file2.bin", permissions: 0o644, size: Int64(file2Data.count)), + data: file2Data) + try archiver.writeEntry( + entry: WriteEntry.file(path: "/data/file3.bin", permissions: 0o644, size: Int64(file3Data.count)), + data: file3Data) + // Include an empty file to verify it doesn't break size calculations + try archiver.writeEntry( + entry: WriteEntry.file(path: "/data/empty.bin", permissions: 0o644, size: 0), + data: Data()) + try archiver.finishEncoding() + + // Set up progress collection + let collector = ProgressCollector() + let shouldPrintProgress = ProcessInfo.processInfo.environment["PRINT_UNPACK_PROGRESS"] == "1" + let progressHandler: ProgressHandler = { events in + if shouldPrintProgress { + for event in events { + print("unpack-progress \(event.event): \(event.value)") + } + } + await collector.append(events) + } + + // Unpack with progress tracking + let formatter = try EXT4.Formatter(fsPath) + try await formatter.unpack(source: archivePath, progress: progressHandler) + try formatter.close() + + // Analyze collected events + let allEvents = await collector.allEvents() + + var reportedTotalSize: Int64 = 0 + var reportedTotalItems: Int = 0 + var cumulativeSize: Int64 = 0 + var itemCount: Int = 0 + + for event in allEvents { + switch event.event { + case "add-total-size": + let value = try #require(event.value as? Int64, "add-total-size value should be Int64") + reportedTotalSize += value + case "add-total-items": + let value = try #require(event.value as? Int, "add-total-items value should be Int") + reportedTotalItems += value + case "add-size": + let value = try #require(event.value as? Int64, "add-size value should be Int64") + cumulativeSize += value + case "add-items": + let value = try #require(event.value as? Int, "add-items value should be Int") + itemCount += value + default: + break + } + } + + // Verify the progress contract + let expectedTotalItems: Int = 5 // 1 dir + 4 files + #expect( + reportedTotalSize == expectedTotalSize, + "Total size should be \(expectedTotalSize) bytes, got \(reportedTotalSize)") + #expect( + reportedTotalItems == expectedTotalItems, + "Total items should be \(expectedTotalItems), got \(reportedTotalItems)") + #expect( + cumulativeSize == expectedTotalSize, + "Cumulative size should equal total size (\(expectedTotalSize)), got \(cumulativeSize)") + #expect( + itemCount == expectedTotalItems, + "Should have processed \(expectedTotalItems) entries (1 dir + 4 files), got \(itemCount)") + + // Verify incremental progress: we should get separate add-size events for each file + let addSizeEvents = allEvents.filter { $0.event == "add-size" } + #expect( + addSizeEvents.count == 4, + "Should have 4 add-size events (one per file, including empty), got \(addSizeEvents.count)") + + // Verify individual file sizes were reported correctly + let reportedSizes = addSizeEvents.compactMap { $0.value as? Int64 }.sorted() + #expect( + reportedSizes == [0, 512, 1024, 4096], + "Individual file sizes should be [0, 512, 1024, 4096], got \(reportedSizes)") + + // Verify event-by-event behavior expected by clients: + // total remains stable and written bytes are monotonic as progress updates arrive. + var runningTotal: Int64? + var runningWritten: Int64 = 0 + var previousSnapshot: (written: Int64, total: Int64?)? + var progressSnapshotCount = 0 + + for event in allEvents { + switch event.event { + case "add-total-size": + let value = try #require(event.value as? Int64, "add-total-size value should be Int64") + runningTotal = (runningTotal ?? 0) + value + case "add-size": + let value = try #require(event.value as? Int64, "add-size value should be Int64") + runningWritten += value + let currentSnapshot = (written: runningWritten, total: runningTotal) + if let previousSnapshot { + #expect( + currentSnapshot.written >= previousSnapshot.written, + "Written bytes should be monotonic: \(currentSnapshot.written) < \(previousSnapshot.written)") + #expect( + currentSnapshot.total == previousSnapshot.total, + "Total bytes should remain stable across progress updates") + } + previousSnapshot = currentSnapshot + progressSnapshotCount += 1 + default: + break + } + } + + #expect( + progressSnapshotCount == addSizeEvents.count, + "Should produce one monotonic snapshot per add-size update") + + // Verify totals come before incremental events (first pass before second pass) + let totalSizeIndex = try #require( + allEvents.firstIndex(where: { $0.event == "add-total-size" }), + "add-total-size event should be present") + let firstAddSizeIndex = try #require( + allEvents.firstIndex(where: { $0.event == "add-size" }), + "add-size event should be present") + #expect( + totalSizeIndex < firstAddSizeIndex, + "add-total-size should be reported before add-size events") + + let totalItemsIndex = try #require( + allEvents.firstIndex(where: { $0.event == "add-total-items" }), + "add-total-items event should be present") + let firstAddItemsIndex = try #require( + allEvents.firstIndex(where: { $0.event == "add-items" }), + "add-items event should be present") + #expect( + totalItemsIndex < firstAddItemsIndex, + "add-total-items should be reported before add-items events") + } + + @Test func progressHandlerIsOptional() async throws { + // Verify that unpacking works without a progress handler (existing behavior) + let tempDir = FileManager.default.uniqueTemporaryDirectory() + let archivePath = tempDir.appendingPathComponent("test.tar.gz", isDirectory: false) + let fsPath = FilePath(tempDir.appendingPathComponent("test.ext4.img", isDirectory: false)) + + defer { + try? FileManager.default.removeItem(at: tempDir) + } + + let archiver = try ArchiveWriter( + configuration: ArchiveWriterConfiguration(format: .paxRestricted, filter: .gzip)) + try archiver.open(file: archivePath) + try archiver.writeEntry(entry: WriteEntry.dir(path: "/test", permissions: 0o755), data: nil) + let data = Data(repeating: 0x42, count: 100) + try archiver.writeEntry( + entry: WriteEntry.file(path: "/test/file.bin", permissions: 0o644, size: Int64(data.count)), + data: data) + try archiver.finishEncoding() + + // Unpack without progress handler - should not throw + let formatter = try EXT4.Formatter(fsPath) + try await formatter.unpack(source: archivePath) + try formatter.close() + + // Verify the file was unpacked correctly + let reader = try EXT4.EXT4Reader(blockDevice: fsPath) + let children = try reader.children(of: EXT4.RootInode) + let childNames = Set(children.map { $0.0 }) + #expect(childNames.contains("test"), "Directory 'test' should exist in unpacked filesystem") + } +} + extension ContainerizationArchive.WriteEntry { static func dir(path: String, permissions: UInt16) -> WriteEntry { let entry = WriteEntry() diff --git a/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift b/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift new file mode 100644 index 00000000..1fa9d753 --- /dev/null +++ b/Tests/ContainerizationTests/ImageTests/HeaderScanTimingTests.swift @@ -0,0 +1,176 @@ +//===----------------------------------------------------------------------===// +// Copyright © 2026 Apple Inc. and the Containerization project authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//===----------------------------------------------------------------------===// + +#if os(macOS) +import ContainerizationArchive +import ContainerizationEXT4 +import ContainerizationExtras +import ContainerizationOCI +import Foundation +import SystemPackage +import Testing + +@testable import Containerization + +/// Measures header scan overhead vs. full unpack time using real container images +/// pulled from a registry. +/// +/// Run with: +/// ENABLE_TIMING_TESTS=1 swift test --filter ImageHeaderScanTimingTest +@Suite +struct ImageHeaderScanTimingTest { + private static let isEnabled = ProcessInfo.processInfo.environment["ENABLE_TIMING_TESTS"] != nil + + let store: ImageStore + let dir: URL + let contentStore: ContentStore + + init() throws { + let dir = FileManager.default.uniqueTemporaryDirectory(create: true) + let cs = try LocalContentStore(path: dir) + let store = try ImageStore(path: dir, contentStore: cs) + self.dir = dir + self.store = store + self.contentStore = cs + } + + @Test(.enabled(if: ImageHeaderScanTimingTest.isEnabled)) + func measureAlpineAndUbuntu() async throws { + defer { try? FileManager.default.removeItem(at: dir) } + + let images: [(reference: String, label: String)] = [ + ("ghcr.io/linuxcontainers/alpine:3.20", "Alpine 3.20"), + ("docker.io/library/ubuntu:24.04", "Ubuntu 24.04"), + ] + + for image in images { + print("\n==============================") + print("Image: \(image.label)") + print("==============================") + + let img = try await store.pull(reference: image.reference, platform: .current) + let manifest = try await img.manifest(for: .current) + + for (i, layer) in manifest.layers.enumerated() { + let content = try await img.getContent(digest: layer.digest) + let compression = compressionFilter(for: layer.mediaType) + let compressedSize = try FileManager.default.attributesOfItem(atPath: content.path.path)[.size] as? Int64 ?? 0 + let label = "\(image.label) layer \(i + 1)/\(manifest.layers.count) (\(layer.mediaType), \(formatBytes(compressedSize)) compressed)" + try await measureOverhead(url: content.path, compression: compression, label: label) + } + } + } + + // MARK: - Helpers + + private func compressionFilter(for mediaType: String) -> ContainerizationArchive.Filter { + switch mediaType { + case MediaTypes.imageLayerZstd, MediaTypes.dockerImageLayerZstd: + return .zstd + case MediaTypes.imageLayer, MediaTypes.dockerImageLayer: + return .none + default: + return .gzip + } + } + + private func measureOverhead(url: URL, compression: ContainerizationArchive.Filter, label: String) async throws { + let clock = ContinuousClock() + + print("\n--- \(label) ---\n") + + // For zstd, pre-decompress once (matching the production code path in EXT4Unpacker). + let scanFile: URL + let scanFilter: ContainerizationArchive.Filter + var decompressedFile: URL? + if compression == .zstd { + var decompressed: URL = url + let decompressDuration = try clock.measure { + decompressed = try ArchiveReader.decompressZstd(url) + } + scanFile = decompressed + scanFilter = .none + decompressedFile = decompressed + print(" Zstd decompress: \(decompressDuration)") + } else { + scanFile = url + scanFilter = compression + } + defer { + if let decompressedFile { + ArchiveReader.cleanUpDecompressedZstd(decompressedFile) + } + } + + // 1. Header scan only + var scannedTotals: (size: Int64, items: Int) = (0, 0) + let scanDuration = try clock.measure { + scannedTotals = try EXT4.Formatter.scanArchiveHeaders( + format: .paxRestricted, filter: scanFilter, file: scanFile) + } + print(" Scanned total size: \(formatBytes(scannedTotals.size)) (\(scannedTotals.items) items)") + print(" Header scan: \(scanDuration)") + + // 2. Full unpack without progress + let tempDir1 = FileManager.default.uniqueTemporaryDirectory() + let fsPath1 = FilePath(tempDir1.appendingPathComponent("no-progress.ext4.img", isDirectory: false)) + defer { try? FileManager.default.removeItem(at: tempDir1) } + + let unpackOnlyDuration = try await clock.measure { + let formatter = try EXT4.Formatter(fsPath1) + try await formatter.unpack(source: url, compression: compression) + try formatter.close() + } + print(" Unpack (no progress): \(unpackOnlyDuration)") + + // 3. Full unpack with progress (includes header scan pass) + let tempDir2 = FileManager.default.uniqueTemporaryDirectory() + let fsPath2 = FilePath(tempDir2.appendingPathComponent("with-progress.ext4.img", isDirectory: false)) + defer { try? FileManager.default.removeItem(at: tempDir2) } + + let noopProgress: ProgressHandler = { _ in } + let withProgressDuration = try await clock.measure { + let formatter = try EXT4.Formatter(fsPath2) + try await formatter.unpack(source: url, compression: compression, progress: noopProgress) + try formatter.close() + } + print(" Unpack (w/ progress): \(withProgressDuration)") + + // Summary + let scanMs = toMs(scanDuration) + let unpackMs = toMs(unpackOnlyDuration) + let withProgressMs = toMs(withProgressDuration) + let overheadMs = withProgressMs - unpackMs + let overheadPct = unpackMs > 0 ? (overheadMs / unpackMs) * 100 : 0 + + print("\n Summary:") + print(" Header scan alone: \(String(format: "%.1f", scanMs)) ms") + print(" Unpack only: \(String(format: "%.1f", unpackMs)) ms") + print(" Unpack + progress: \(String(format: "%.1f", withProgressMs)) ms") + print(" Overhead: \(String(format: "%.1f", overheadMs)) ms (\(String(format: "%.1f", overheadPct))%)") + } + + private func toMs(_ d: Duration) -> Double { + let c = d.components + return Double(c.seconds) * 1000.0 + Double(c.attoseconds) / 1e15 + } + + private func formatBytes(_ bytes: Int64) -> String { + let mb = Double(bytes) / 1_048_576.0 + return "\(String(format: "%.1f", mb)) MB" + } +} +#endif