From 4ded42151bdbd70f60645e62d738e39449be6250 Mon Sep 17 00:00:00 2001 From: Aditya Ramani Date: Thu, 2 Apr 2026 17:20:00 -0700 Subject: [PATCH 1/3] Write data in chunks in ContentWriter --- .../Content/ContentWriter.swift | 38 +++++- .../ContentWriterTests.swift | 119 ++++++++++++++++++ 2 files changed, 155 insertions(+), 2 deletions(-) create mode 100644 Tests/ContainerizationOCITests/ContentWriterTests.swift diff --git a/Sources/ContainerizationOCI/Content/ContentWriter.swift b/Sources/ContainerizationOCI/Content/ContentWriter.swift index 0b8c662c..e4360ea9 100644 --- a/Sources/ContainerizationOCI/Content/ContentWriter.swift +++ b/Sources/ContainerizationOCI/Content/ContentWriter.swift @@ -56,8 +56,42 @@ public class ContentWriter { /// - url: The URL to read the data from. @discardableResult public func create(from url: URL) throws -> (size: Int64, digest: SHA256.Digest) { - let data = try Data(contentsOf: url) - return try self.write(data) + let source = try FileHandle(forReadingFrom: url) + defer { try? source.close() } + let tempURL = base.appendingPathComponent(UUID().uuidString) + guard FileManager.default.createFile(atPath: tempURL.path, contents: nil) else { + throw ContainerizationError(.internalError, message: "failed to create temporary file at \(tempURL.absolutePath())") + } + let dest = try FileHandle(forWritingTo: tempURL) + var hasher = SHA256() + var totalSize: Int64 = 0 + let chunkSize = 1024 * 1024 // 1 MiB + do { + while let chunk = try source.read(upToCount: chunkSize), !chunk.isEmpty { + hasher.update(data: chunk) + try dest.write(contentsOf: chunk) + totalSize += Int64(chunk.count) + } + try dest.close() + } catch { + try? dest.close() + try? FileManager.default.removeItem(at: tempURL) + throw error + } + let digest = hasher.finalize() + let destination = base.appendingPathComponent(digest.encoded) + do { + try FileManager.default.moveItem(at: tempURL, to: destination) + } catch let error as NSError { + guard error.code == NSFileWriteFileExistsError else { + throw error + } + try? FileManager.default.removeItem(at: tempURL) + } catch { + try? FileManager.default.removeItem(at: tempURL) + throw error + } + return (totalSize, digest) } /// Encodes the passed in type as a JSON blob and writes it to the base path. diff --git a/Tests/ContainerizationOCITests/ContentWriterTests.swift b/Tests/ContainerizationOCITests/ContentWriterTests.swift new file mode 100644 index 00000000..a13e28a6 --- /dev/null +++ b/Tests/ContainerizationOCITests/ContentWriterTests.swift @@ -0,0 +1,119 @@ +//===----------------------------------------------------------------------===// +// Copyright © 2026 Apple Inc. and the Containerization project authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//===----------------------------------------------------------------------===// + +import ContainerizationError +import Crypto +import Foundation +import Testing + +@testable import ContainerizationOCI + +@Suite("ContentWriter Tests") +struct ContentWriterTests { + private func withTempDirectory(_ body: (URL) throws -> Void) throws { + let dir = FileManager.default.temporaryDirectory + .appendingPathComponent(UUID().uuidString) + try FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true) + defer { try? FileManager.default.removeItem(at: dir) } + try body(dir) + } + + private func makeTempFile(in dir: URL, data: Data) throws -> URL { + let url = dir.appendingPathComponent(UUID().uuidString) + try data.write(to: url) + return url + } + + @Test func testWriteReturnsCorrectSizeAndDigest() throws { + try withTempDirectory { dir in + let writer = try ContentWriter(for: dir) + let data = Data("test content".utf8) + let (size, digest) = try writer.write(data) + let expected = SHA256.hash(data: data) + let destination = dir.appendingPathComponent(digest.encoded) + #expect(size == Int64(data.count)) + #expect(digest == expected) + #expect(FileManager.default.fileExists(atPath: destination.path)) + } + } + + @Test func testWriteDuplicateData() throws { + try withTempDirectory { dir in + let writer = try ContentWriter(for: dir) + let data = Data("duplicate".utf8) + #expect(throws: Never.self) { + try writer.write(data) + try writer.write(data) + } + } + } + + @Test func testCreateFromFileSmallFile() throws { + try withTempDirectory { base in + try withTempDirectory { src in + let data = Data("small file contents".utf8) + let sourceURL = try makeTempFile(in: src, data: data) + let writer = try ContentWriter(for: base) + let (size, digest) = try writer.create(from: sourceURL) + let destination = base.appendingPathComponent(digest.encoded) + let written = try Data(contentsOf: destination) + #expect(size == Int64(data.count)) + #expect(digest == SHA256.hash(data: data)) + #expect(FileManager.default.fileExists(atPath: destination.path)) + #expect(written == data) + } + } + } + + @Test func testCreateFromFileLargeFileDuplicates() throws { + try withTempDirectory { base in + try withTempDirectory { src in + let count = 3 * 1024 * 1024 + 100 + var bytes = [UInt8](repeating: 0, count: count) + arc4random_buf(&bytes, count) + let data = Data(bytes) + let sourceURL = try makeTempFile(in: src, data: data) + let writer = try ContentWriter(for: base) + let (size, digest) = try writer.create(from: sourceURL) + let destination = base.appendingPathComponent(digest.encoded) + #expect(size == Int64(data.count)) + #expect(digest == SHA256.hash(data: data)) + #expect(throws: Never.self) { + try writer.create(from: sourceURL) + } + #expect(FileManager.default.fileExists(atPath: destination.path)) + } + } + } + + private struct SamplePayload: Codable, Equatable { + let name: String + let value: Int + } + + @Test func testCreateFromEncodableReturnsCorrectDigest() throws { + try withTempDirectory { base in + let writer = try ContentWriter(for: base) + let payload = SamplePayload(name: "digest-check", value: 99) + let (size, digest) = try writer.create(from: payload) + let encoder = JSONEncoder() + encoder.outputFormatting = [.sortedKeys] + let expected = try encoder.encode(payload) + #expect(size == Int64(expected.count)) + #expect(digest == SHA256.hash(data: expected)) + } + } +} From 4bdbe47bb9a33389761577ed3b88baf376532e06 Mon Sep 17 00:00:00 2001 From: Aditya Ramani Date: Fri, 3 Apr 2026 08:25:36 -0700 Subject: [PATCH 2/3] Avoid using FileHandle --- .../Content/ContentWriter.swift | 60 ++++++++++++++----- 1 file changed, 44 insertions(+), 16 deletions(-) diff --git a/Sources/ContainerizationOCI/Content/ContentWriter.swift b/Sources/ContainerizationOCI/Content/ContentWriter.swift index e4360ea9..825d4487 100644 --- a/Sources/ContainerizationOCI/Content/ContentWriter.swift +++ b/Sources/ContainerizationOCI/Content/ContentWriter.swift @@ -56,28 +56,56 @@ public class ContentWriter { /// - url: The URL to read the data from. @discardableResult public func create(from url: URL) throws -> (size: Int64, digest: SHA256.Digest) { - let source = try FileHandle(forReadingFrom: url) - defer { try? source.close() } + let sourceFD = Foundation.open(url.path, O_RDONLY) + guard sourceFD >= 0 else { + let err = POSIXErrorCode(rawValue: errno) ?? .EINVAL + throw ContainerizationError(.internalError, message: "failed to open \(url.path) for reading: \(err)") + } + defer { close(sourceFD) } + let tempURL = base.appendingPathComponent(UUID().uuidString) - guard FileManager.default.createFile(atPath: tempURL.path, contents: nil) else { - throw ContainerizationError(.internalError, message: "failed to create temporary file at \(tempURL.absolutePath())") + let destFD = Foundation.open(tempURL.path, O_WRONLY | O_CREAT | O_TRUNC, 0o644) + guard destFD >= 0 else { + let err = POSIXErrorCode(rawValue: errno) ?? .EINVAL + throw ContainerizationError(.internalError, message: "failed to create temporary file at \(tempURL.absolutePath()): \(err)") } - let dest = try FileHandle(forWritingTo: tempURL) + + let chunkSize = 1024 * 1024 // 1 MiB + let buf = UnsafeMutableRawBufferPointer.allocate(byteCount: chunkSize, alignment: 1) + defer { buf.deallocate() } + guard let baseAddress = buf.baseAddress else { + close(destFD) + try? FileManager.default.removeItem(at: tempURL) + throw ContainerizationError(.internalError, message: "failed to allocate read buffer of size \(chunkSize)") + } + var hasher = SHA256() var totalSize: Int64 = 0 - let chunkSize = 1024 * 1024 // 1 MiB - do { - while let chunk = try source.read(upToCount: chunkSize), !chunk.isEmpty { - hasher.update(data: chunk) - try dest.write(contentsOf: chunk) - totalSize += Int64(chunk.count) + while true { + let n = read(sourceFD, baseAddress, chunkSize) + if n == 0 { break } + if n < 0 { + let err = POSIXErrorCode(rawValue: errno) ?? .EINVAL + close(destFD) + try? FileManager.default.removeItem(at: tempURL) + throw ContainerizationError(.internalError, message: "failed to read from \(url.path): \(err)") } - try dest.close() - } catch { - try? dest.close() - try? FileManager.default.removeItem(at: tempURL) - throw error + hasher.update(data: UnsafeRawBufferPointer(start: baseAddress, count: n)) + var written = 0 + while written < n { + let w = Foundation.write(destFD, baseAddress.advanced(by: written), n - written) + if w < 0 { + let err = POSIXErrorCode(rawValue: errno) ?? .EINVAL + close(destFD) + try? FileManager.default.removeItem(at: tempURL) + throw ContainerizationError(.internalError, message: "failed to write to \(tempURL.absolutePath()): \(err)") + } + written += w + } + totalSize += Int64(n) } + close(destFD) + let digest = hasher.finalize() let destination = base.appendingPathComponent(digest.encoded) do { From 5756e1295c9e08a0c23870f54873d1773da3ac68 Mon Sep 17 00:00:00 2001 From: Aditya Ramani Date: Fri, 3 Apr 2026 10:10:49 -0700 Subject: [PATCH 3/3] set CZError cause --- .../Content/ContentWriter.swift | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/Sources/ContainerizationOCI/Content/ContentWriter.swift b/Sources/ContainerizationOCI/Content/ContentWriter.swift index 825d4487..87b4c387 100644 --- a/Sources/ContainerizationOCI/Content/ContentWriter.swift +++ b/Sources/ContainerizationOCI/Content/ContentWriter.swift @@ -58,16 +58,18 @@ public class ContentWriter { public func create(from url: URL) throws -> (size: Int64, digest: SHA256.Digest) { let sourceFD = Foundation.open(url.path, O_RDONLY) guard sourceFD >= 0 else { - let err = POSIXErrorCode(rawValue: errno) ?? .EINVAL - throw ContainerizationError(.internalError, message: "failed to open \(url.path) for reading: \(err)") + let errCode = POSIXErrorCode(rawValue: errno) ?? .EINVAL + let err = POSIXError(errCode) + throw ContainerizationError(.internalError, message: "failed to open \(url.path) for reading", cause: err) } defer { close(sourceFD) } let tempURL = base.appendingPathComponent(UUID().uuidString) let destFD = Foundation.open(tempURL.path, O_WRONLY | O_CREAT | O_TRUNC, 0o644) guard destFD >= 0 else { - let err = POSIXErrorCode(rawValue: errno) ?? .EINVAL - throw ContainerizationError(.internalError, message: "failed to create temporary file at \(tempURL.absolutePath()): \(err)") + let errCode = POSIXErrorCode(rawValue: errno) ?? .EINVAL + let err = POSIXError(errCode) + throw ContainerizationError(.internalError, message: "failed to create temporary file at \(tempURL.absolutePath())", cause: err) } let chunkSize = 1024 * 1024 // 1 MiB @@ -85,20 +87,22 @@ public class ContentWriter { let n = read(sourceFD, baseAddress, chunkSize) if n == 0 { break } if n < 0 { - let err = POSIXErrorCode(rawValue: errno) ?? .EINVAL close(destFD) + let errCode = POSIXErrorCode(rawValue: errno) ?? .EINVAL + let err = POSIXError(errCode) try? FileManager.default.removeItem(at: tempURL) - throw ContainerizationError(.internalError, message: "failed to read from \(url.path): \(err)") + throw ContainerizationError(.internalError, message: "failed to read from \(url.path)", cause: err) } hasher.update(data: UnsafeRawBufferPointer(start: baseAddress, count: n)) var written = 0 while written < n { let w = Foundation.write(destFD, baseAddress.advanced(by: written), n - written) if w < 0 { - let err = POSIXErrorCode(rawValue: errno) ?? .EINVAL close(destFD) + let errCode = POSIXErrorCode(rawValue: errno) ?? .EINVAL + let err = POSIXError(errCode) try? FileManager.default.removeItem(at: tempURL) - throw ContainerizationError(.internalError, message: "failed to write to \(tempURL.absolutePath()): \(err)") + throw ContainerizationError(.internalError, message: "failed to write to \(tempURL.absolutePath())", cause: err) } written += w }