diff --git a/src/iceberg/json_serde.cc b/src/iceberg/json_serde.cc index 93705f538..004e8b3f5 100644 --- a/src/iceberg/json_serde.cc +++ b/src/iceberg/json_serde.cc @@ -26,6 +26,7 @@ #include +#include "iceberg/constants.h" #include "iceberg/json_serde_internal.h" #include "iceberg/name_mapping.h" #include "iceberg/partition_field.h" diff --git a/src/iceberg/manifest/manifest_writer.cc b/src/iceberg/manifest/manifest_writer.cc index 36b34b8bc..fbe3d79bc 100644 --- a/src/iceberg/manifest/manifest_writer.cc +++ b/src/iceberg/manifest/manifest_writer.cc @@ -30,7 +30,6 @@ #include "iceberg/partition_summary_internal.h" #include "iceberg/result.h" #include "iceberg/schema.h" -#include "iceberg/snapshot.h" #include "iceberg/table_metadata.h" #include "iceberg/util/macros.h" @@ -272,87 +271,41 @@ Result> OpenFileWriter( return writer; } -Result> ManifestWriter::MakeV1Writer( - std::optional snapshot_id, std::string_view manifest_location, - std::shared_ptr file_io, std::shared_ptr partition_spec, - std::shared_ptr current_schema) { - if (manifest_location.empty()) { - return InvalidArgument("Manifest location cannot be empty"); - } - if (!file_io) { - return InvalidArgument("FileIO cannot be null"); - } - if (!partition_spec) { - return InvalidArgument("PartitionSpec cannot be null"); - } - if (!current_schema) { - return InvalidArgument("Current schema cannot be null"); - } +Result> ManifestWriter::MakeWriter( + int8_t format_version, std::optional snapshot_id, + std::string_view manifest_location, std::shared_ptr file_io, + std::shared_ptr partition_spec, std::shared_ptr current_schema, + ManifestContent content, std::optional first_row_id) { + ICEBERG_PRECHECK(!manifest_location.empty(), "Manifest location cannot be empty"); + ICEBERG_PRECHECK(file_io, "FileIO cannot be null"); + ICEBERG_PRECHECK(partition_spec, "PartitionSpec cannot be null"); + ICEBERG_PRECHECK(current_schema, "Current schema cannot be null"); - auto adapter = std::make_unique( - snapshot_id, std::move(partition_spec), std::move(current_schema)); - ICEBERG_RETURN_UNEXPECTED(adapter->Init()); - ICEBERG_RETURN_UNEXPECTED(adapter->StartAppending()); + std::unique_ptr adapter; + std::optional writer_first_row_id = std::nullopt; - auto schema = adapter->schema(); - ICEBERG_ASSIGN_OR_RAISE( - auto writer, - OpenFileWriter(manifest_location, std::move(schema), std::move(file_io), - adapter->metadata(), "manifest_entry")); - return std::unique_ptr(new ManifestWriter( - std::move(writer), std::move(adapter), manifest_location, std::nullopt)); -} - -Result> ManifestWriter::MakeV2Writer( - std::optional snapshot_id, std::string_view manifest_location, - std::shared_ptr file_io, std::shared_ptr partition_spec, - std::shared_ptr current_schema, ManifestContent content) { - if (manifest_location.empty()) { - return InvalidArgument("Manifest location cannot be empty"); - } - if (!file_io) { - return InvalidArgument("FileIO cannot be null"); - } - if (!partition_spec) { - return InvalidArgument("PartitionSpec cannot be null"); - } - if (!current_schema) { - return InvalidArgument("Current schema cannot be null"); + switch (format_version) { + case 1: { + adapter = std::make_unique( + snapshot_id, std::move(partition_spec), std::move(current_schema)); + break; + } + case 2: { + adapter = std::make_unique( + snapshot_id, std::move(partition_spec), std::move(current_schema), content); + break; + } + case 3: { + adapter = std::make_unique( + snapshot_id, first_row_id, std::move(partition_spec), std::move(current_schema), + content); + writer_first_row_id = first_row_id; + break; + } + default: + return NotSupported("Format version {} is not supported", format_version); } - auto adapter = std::make_unique( - snapshot_id, std::move(partition_spec), std::move(current_schema), content); - ICEBERG_RETURN_UNEXPECTED(adapter->Init()); - ICEBERG_RETURN_UNEXPECTED(adapter->StartAppending()); - - auto schema = adapter->schema(); - ICEBERG_ASSIGN_OR_RAISE( - auto writer, - OpenFileWriter(manifest_location, std::move(schema), std::move(file_io), - adapter->metadata(), "manifest_entry")); - return std::unique_ptr(new ManifestWriter( - std::move(writer), std::move(adapter), manifest_location, std::nullopt)); -} -Result> ManifestWriter::MakeV3Writer( - std::optional snapshot_id, std::optional first_row_id, - std::string_view manifest_location, std::shared_ptr file_io, - std::shared_ptr partition_spec, std::shared_ptr current_schema, - ManifestContent content) { - if (manifest_location.empty()) { - return InvalidArgument("Manifest location cannot be empty"); - } - if (!file_io) { - return InvalidArgument("FileIO cannot be null"); - } - if (!partition_spec) { - return InvalidArgument("PartitionSpec cannot be null"); - } - if (!current_schema) { - return InvalidArgument("Current schema cannot be null"); - } - auto adapter = std::make_unique( - snapshot_id, first_row_id, std::move(partition_spec), std::move(current_schema), - content); ICEBERG_RETURN_UNEXPECTED(adapter->Init()); ICEBERG_RETURN_UNEXPECTED(adapter->StartAppending()); @@ -362,28 +315,7 @@ Result> ManifestWriter::MakeV3Writer( OpenFileWriter(manifest_location, std::move(schema), std::move(file_io), adapter->metadata(), "manifest_entry")); return std::unique_ptr(new ManifestWriter( - std::move(writer), std::move(adapter), manifest_location, first_row_id)); -} - -Result> ManifestWriter::MakeWriter( - int8_t format_version, std::optional snapshot_id, - std::string_view manifest_location, std::shared_ptr file_io, - std::shared_ptr partition_spec, std::shared_ptr current_schema, - ManifestContent content, std::optional first_row_id) { - switch (format_version) { - case 1: - return MakeV1Writer(snapshot_id, manifest_location, std::move(file_io), - std::move(partition_spec), std::move(current_schema)); - case 2: - return MakeV2Writer(snapshot_id, manifest_location, std::move(file_io), - std::move(partition_spec), std::move(current_schema), content); - case 3: - return MakeV3Writer(snapshot_id, first_row_id, manifest_location, - std::move(file_io), std::move(partition_spec), - std::move(current_schema), content); - default: - return NotSupported("Format version {} is not supported", format_version); - } + std::move(writer), std::move(adapter), manifest_location, writer_first_row_id)); } ManifestListWriter::ManifestListWriter(std::unique_ptr writer, @@ -420,83 +352,47 @@ std::optional ManifestListWriter::next_row_id() const { return adapter_->next_row_id(); } -Result> ManifestListWriter::MakeV1Writer( - int64_t snapshot_id, std::optional parent_snapshot_id, - std::string_view manifest_list_location, std::shared_ptr file_io) { - auto adapter = std::make_unique(snapshot_id, parent_snapshot_id); - ICEBERG_RETURN_UNEXPECTED(adapter->Init()); - ICEBERG_RETURN_UNEXPECTED(adapter->StartAppending()); - - auto schema = adapter->schema(); - ICEBERG_ASSIGN_OR_RAISE( - auto writer, - OpenFileWriter(manifest_list_location, std::move(schema), std::move(file_io), - adapter->metadata(), "manifest_file")); - return std::unique_ptr( - new ManifestListWriter(std::move(writer), std::move(adapter))); -} - -Result> ManifestListWriter::MakeV2Writer( - int64_t snapshot_id, std::optional parent_snapshot_id, - int64_t sequence_number, std::string_view manifest_list_location, - std::shared_ptr file_io) { - auto adapter = std::make_unique(snapshot_id, parent_snapshot_id, - sequence_number); - ICEBERG_RETURN_UNEXPECTED(adapter->Init()); - ICEBERG_RETURN_UNEXPECTED(adapter->StartAppending()); - - auto schema = adapter->schema(); - ICEBERG_ASSIGN_OR_RAISE( - auto writer, - OpenFileWriter(manifest_list_location, std::move(schema), std::move(file_io), - adapter->metadata(), "manifest_file")); - - return std::unique_ptr( - new ManifestListWriter(std::move(writer), std::move(adapter))); -} - -Result> ManifestListWriter::MakeV3Writer( - int64_t snapshot_id, std::optional parent_snapshot_id, - int64_t sequence_number, int64_t first_row_id, - std::string_view manifest_list_location, std::shared_ptr file_io) { - auto adapter = std::make_unique(snapshot_id, parent_snapshot_id, - sequence_number, first_row_id); - ICEBERG_RETURN_UNEXPECTED(adapter->Init()); - ICEBERG_RETURN_UNEXPECTED(adapter->StartAppending()); - - auto schema = adapter->schema(); - ICEBERG_ASSIGN_OR_RAISE( - auto writer, - OpenFileWriter(manifest_list_location, std::move(schema), std::move(file_io), - adapter->metadata(), "manifest_file")); - return std::unique_ptr( - new ManifestListWriter(std::move(writer), std::move(adapter))); -} - Result> ManifestListWriter::MakeWriter( int8_t format_version, int64_t snapshot_id, std::optional parent_snapshot_id, std::string_view manifest_list_location, std::shared_ptr file_io, std::optional sequence_number, std::optional first_row_id) { + std::unique_ptr adapter; + switch (format_version) { - case 1: - return MakeV1Writer(snapshot_id, parent_snapshot_id, manifest_list_location, - std::move(file_io)); - case 2: + case 1: { + adapter = std::make_unique(snapshot_id, parent_snapshot_id); + break; + } + case 2: { ICEBERG_PRECHECK(sequence_number.has_value(), "Sequence number is required for format version 2"); - return MakeV2Writer(snapshot_id, parent_snapshot_id, sequence_number.value(), - manifest_list_location, std::move(file_io)); - case 3: + adapter = std::make_unique(snapshot_id, parent_snapshot_id, + sequence_number.value()); + break; + } + case 3: { ICEBERG_PRECHECK(sequence_number.has_value(), "Sequence number is required for format version 3"); ICEBERG_PRECHECK(first_row_id.has_value(), "First row ID is required for format version 3"); - return MakeV3Writer(snapshot_id, parent_snapshot_id, sequence_number.value(), - first_row_id.value(), manifest_list_location, - std::move(file_io)); + adapter = std::make_unique( + snapshot_id, parent_snapshot_id, sequence_number.value(), first_row_id.value()); + break; + } default: return NotSupported("Format version {} is not supported", format_version); } + + ICEBERG_RETURN_UNEXPECTED(adapter->Init()); + ICEBERG_RETURN_UNEXPECTED(adapter->StartAppending()); + + auto schema = adapter->schema(); + ICEBERG_ASSIGN_OR_RAISE( + auto writer, + OpenFileWriter(manifest_list_location, std::move(schema), std::move(file_io), + adapter->metadata(), "manifest_file")); + return std::unique_ptr( + new ManifestListWriter(std::move(writer), std::move(adapter))); } } // namespace iceberg diff --git a/src/iceberg/manifest/manifest_writer.h b/src/iceberg/manifest/manifest_writer.h index 288bda31a..cc57f25fc 100644 --- a/src/iceberg/manifest/manifest_writer.h +++ b/src/iceberg/manifest/manifest_writer.h @@ -117,48 +117,6 @@ class ICEBERG_EXPORT ManifestWriter { /// \note Only valid after the file is closed. Result ToManifestFile() const; - /// \brief Creates a writer for a manifest file. - /// \param snapshot_id ID of the snapshot. - /// \param manifest_location Path to the manifest file. - /// \param file_io File IO implementation to use. - /// \param partition_spec Partition spec for the manifest. - /// \param current_schema Current table schema. - /// \return A Result containing the writer or an error. - static Result> MakeV1Writer( - std::optional snapshot_id, std::string_view manifest_location, - std::shared_ptr file_io, std::shared_ptr partition_spec, - std::shared_ptr current_schema); - - /// \brief Creates a writer for a manifest file. - /// \param snapshot_id ID of the snapshot. - /// \param manifest_location Path to the manifest file. - /// \param file_io File IO implementation to use. - /// \param partition_spec Partition spec for the manifest. - /// \param current_schema Schema containing the source fields referenced by partition - /// spec. - /// \param content Content of the manifest. - /// \return A Result containing the writer or an error. - static Result> MakeV2Writer( - std::optional snapshot_id, std::string_view manifest_location, - std::shared_ptr file_io, std::shared_ptr partition_spec, - std::shared_ptr current_schema, ManifestContent content); - - /// \brief Creates a writer for a manifest file. - /// \param snapshot_id ID of the snapshot. - /// \param first_row_id First row ID of the snapshot. - /// \param manifest_location Path to the manifest file. - /// \param file_io File IO implementation to use. - /// \param partition_spec Partition spec for the manifest. - /// \param current_schema Schema containing the source fields referenced by partition - /// spec. - /// \param content Content of the manifest. - /// \return A Result containing the writer or an error. - static Result> MakeV3Writer( - std::optional snapshot_id, std::optional first_row_id, - std::string_view manifest_location, std::shared_ptr file_io, - std::shared_ptr partition_spec, - std::shared_ptr current_schema, ManifestContent content); - /// \brief Factory function to create a writer for a manifest file based on format /// version. /// \param format_version The format version (1, 2, 3, etc.). @@ -226,41 +184,6 @@ class ICEBERG_EXPORT ManifestListWriter { /// \brief Get the next row id to assign. std::optional next_row_id() const; - /// \brief Creates a writer for the v1 manifest list. - /// \param snapshot_id ID of the snapshot. - /// \param parent_snapshot_id ID of the parent snapshot. - /// \param manifest_list_location Path to the manifest list file. - /// \param file_io File IO implementation to use. - /// \return A Result containing the writer or an error. - static Result> MakeV1Writer( - int64_t snapshot_id, std::optional parent_snapshot_id, - std::string_view manifest_list_location, std::shared_ptr file_io); - - /// \brief Creates a writer for the manifest list. - /// \param snapshot_id ID of the snapshot. - /// \param parent_snapshot_id ID of the parent snapshot. - /// \param sequence_number Sequence number of the snapshot. - /// \param manifest_list_location Path to the manifest list file. - /// \param file_io File IO implementation to use. - /// \return A Result containing the writer or an error. - static Result> MakeV2Writer( - int64_t snapshot_id, std::optional parent_snapshot_id, - int64_t sequence_number, std::string_view manifest_list_location, - std::shared_ptr file_io); - - /// \brief Creates a writer for the manifest list. - /// \param snapshot_id ID of the snapshot. - /// \param parent_snapshot_id ID of the parent snapshot. - /// \param sequence_number Sequence number of the snapshot. - /// \param first_row_id First row ID of the snapshot. - /// \param manifest_list_location Path to the manifest list file. - /// \param file_io File IO implementation to use. - /// \return A Result containing the writer or an error. - static Result> MakeV3Writer( - int64_t snapshot_id, std::optional parent_snapshot_id, - int64_t sequence_number, int64_t first_row_id, - std::string_view manifest_list_location, std::shared_ptr file_io); - /// \brief Factory function to create a writer for the manifest list based on format /// version. /// \param format_version The format version (1, 2, 3, etc.). diff --git a/src/iceberg/test/delete_file_index_test.cc b/src/iceberg/test/delete_file_index_test.cc index d5866e27c..b99a2816b 100644 --- a/src/iceberg/test/delete_file_index_test.cc +++ b/src/iceberg/test/delete_file_index_test.cc @@ -44,7 +44,7 @@ namespace iceberg { -class DeleteFileIndexTest : public testing::TestWithParam { +class DeleteFileIndexTest : public testing::TestWithParam { protected: void SetUp() override { avro::RegisterAll(); @@ -160,7 +160,7 @@ class DeleteFileIndexTest : public testing::TestWithParam { }; } - ManifestFile WriteDeleteManifest(int format_version, int64_t snapshot_id, + ManifestFile WriteDeleteManifest(int8_t format_version, int64_t snapshot_id, std::vector entries, std::shared_ptr spec) { const std::string manifest_path = MakeManifestPath(); @@ -230,7 +230,7 @@ TEST_P(DeleteFileIndexTest, TestEmptyIndex) { } TEST_P(DeleteFileIndexTest, TestMinSequenceNumberFilteringForFiles) { - int version = GetParam(); + auto version = GetParam(); auto eq_delete_1 = MakeEqualityDeleteFile("/path/to/eq-delete-1.parquet", PartitionValues(std::vector{}), @@ -261,7 +261,7 @@ TEST_P(DeleteFileIndexTest, TestMinSequenceNumberFilteringForFiles) { } TEST_P(DeleteFileIndexTest, TestUnpartitionedDeletes) { - int version = GetParam(); + auto version = GetParam(); auto eq_delete_1 = MakeEqualityDeleteFile("/path/to/eq-delete-1.parquet", PartitionValues(std::vector{}), @@ -359,7 +359,7 @@ TEST_P(DeleteFileIndexTest, TestUnpartitionedDeletes) { } TEST_P(DeleteFileIndexTest, TestPartitionedDeleteIndex) { - int version = GetParam(); + auto version = GetParam(); auto partition_a = PartitionValues({Literal::Int(0)}); auto eq_delete_1 = MakeEqualityDeleteFile("/path/to/eq-delete-1.parquet", partition_a, @@ -462,7 +462,7 @@ TEST_P(DeleteFileIndexTest, TestPartitionedDeleteIndex) { } TEST_P(DeleteFileIndexTest, TestPartitionedTableWithPartitionPosDeletes) { - int version = GetParam(); + auto version = GetParam(); auto partition_a = PartitionValues({Literal::Int(0)}); auto pos_delete = MakePositionDeleteFile("/path/to/pos-delete.parquet", partition_a, @@ -487,7 +487,7 @@ TEST_P(DeleteFileIndexTest, TestPartitionedTableWithPartitionPosDeletes) { } TEST_P(DeleteFileIndexTest, TestPartitionedTableWithPartitionEqDeletes) { - int version = GetParam(); + auto version = GetParam(); auto partition_a = PartitionValues({Literal::Int(0)}); auto eq_delete = MakeEqualityDeleteFile("/path/to/eq-delete.parquet", partition_a, @@ -512,7 +512,7 @@ TEST_P(DeleteFileIndexTest, TestPartitionedTableWithPartitionEqDeletes) { } TEST_P(DeleteFileIndexTest, TestPartitionedTableWithUnrelatedPartitionDeletes) { - int version = GetParam(); + auto version = GetParam(); // Create deletes for partition A auto partition_a = PartitionValues({Literal::Int(0)}); @@ -538,7 +538,7 @@ TEST_P(DeleteFileIndexTest, TestPartitionedTableWithUnrelatedPartitionDeletes) { } TEST_P(DeleteFileIndexTest, TestPartitionedTableWithOlderPartitionDeletes) { - int version = GetParam(); + auto version = GetParam(); if (version >= 3) { GTEST_SKIP() << "DVs are not filtered using sequence numbers in V3+"; } @@ -568,7 +568,7 @@ TEST_P(DeleteFileIndexTest, TestPartitionedTableWithOlderPartitionDeletes) { } TEST_P(DeleteFileIndexTest, TestPartitionedTableScanWithGlobalDeletes) { - int version = GetParam(); + auto version = GetParam(); if (version >= 3) { GTEST_SKIP() << "Different behavior for position deletes in V3"; } @@ -599,7 +599,7 @@ TEST_P(DeleteFileIndexTest, TestPartitionedTableScanWithGlobalDeletes) { } TEST_P(DeleteFileIndexTest, TestPartitionedTableScanWithGlobalAndPartitionDeletes) { - int version = GetParam(); + auto version = GetParam(); if (version >= 3) { GTEST_SKIP() << "Different behavior for position deletes in V3"; } @@ -644,7 +644,7 @@ TEST_P(DeleteFileIndexTest, TestPartitionedTableScanWithGlobalAndPartitionDelete } TEST_P(DeleteFileIndexTest, TestPartitionedTableSequenceNumbers) { - int version = GetParam(); + auto version = GetParam(); auto partition_a = PartitionValues({Literal::Int(0)}); auto eq_delete = MakeEqualityDeleteFile("/path/to/eq-delete.parquet", partition_a, @@ -672,7 +672,7 @@ TEST_P(DeleteFileIndexTest, TestPartitionedTableSequenceNumbers) { } TEST_P(DeleteFileIndexTest, TestUnpartitionedTableSequenceNumbers) { - int version = GetParam(); + auto version = GetParam(); if (version >= 3) { GTEST_SKIP() << "Different behavior in V3"; } @@ -841,7 +841,7 @@ TEST_P(DeleteFileIndexTest, TestEqualityDeletesGroup) { } TEST_P(DeleteFileIndexTest, TestMixDeleteFilesAndDVs) { - int version = GetParam(); + auto version = GetParam(); if (version < 3) { GTEST_SKIP() << "DVs only supported in V3+"; } @@ -897,7 +897,7 @@ TEST_P(DeleteFileIndexTest, TestMixDeleteFilesAndDVs) { } TEST_P(DeleteFileIndexTest, TestMultipleDVs) { - int version = GetParam(); + auto version = GetParam(); if (version < 3) { GTEST_SKIP() << "DVs only supported in V3+"; } @@ -923,7 +923,7 @@ TEST_P(DeleteFileIndexTest, TestMultipleDVs) { } TEST_P(DeleteFileIndexTest, TestInvalidDVSequenceNumber) { - int version = GetParam(); + auto version = GetParam(); if (version < 3) { GTEST_SKIP() << "DVs only supported in V3+"; } @@ -949,7 +949,7 @@ TEST_P(DeleteFileIndexTest, TestInvalidDVSequenceNumber) { } TEST_P(DeleteFileIndexTest, TestReferencedDeleteFiles) { - int version = GetParam(); + auto version = GetParam(); auto partition_a = PartitionValues({Literal::Int(0)}); auto eq_delete = MakeEqualityDeleteFile("/path/to/eq-delete.parquet", partition_a, @@ -987,7 +987,7 @@ TEST_P(DeleteFileIndexTest, TestReferencedDeleteFiles) { } TEST_P(DeleteFileIndexTest, TestExistingDeleteFiles) { - int version = GetParam(); + auto version = GetParam(); auto partition_a = PartitionValues({Literal::Int(0)}); auto eq_delete = MakeEqualityDeleteFile("/path/to/eq-delete.parquet", partition_a, @@ -1020,7 +1020,7 @@ TEST_P(DeleteFileIndexTest, TestExistingDeleteFiles) { } TEST_P(DeleteFileIndexTest, TestDeletedStatusExcluded) { - int version = GetParam(); + auto version = GetParam(); auto partition_a = PartitionValues({Literal::Int(0)}); auto eq_delete_added = MakeEqualityDeleteFile( @@ -1059,7 +1059,7 @@ TEST_P(DeleteFileIndexTest, TestDeletedStatusExcluded) { } TEST_P(DeleteFileIndexTest, TestPositionDeleteDiscardMetrics) { - int version = GetParam(); + auto version = GetParam(); auto partition_a = PartitionValues({Literal::Int(0)}); @@ -1119,7 +1119,7 @@ TEST_P(DeleteFileIndexTest, TestPositionDeleteDiscardMetrics) { } TEST_P(DeleteFileIndexTest, TestEqualityDeleteDiscardMetrics) { - int version = GetParam(); + auto version = GetParam(); auto partition_a = PartitionValues({Literal::Int(0)}); diff --git a/src/iceberg/test/manifest_group_test.cc b/src/iceberg/test/manifest_group_test.cc index 2a062c9ee..34ff9993b 100644 --- a/src/iceberg/test/manifest_group_test.cc +++ b/src/iceberg/test/manifest_group_test.cc @@ -45,7 +45,7 @@ namespace iceberg { -class ManifestGroupTest : public testing::TestWithParam { +class ManifestGroupTest : public testing::TestWithParam { protected: void SetUp() override { avro::RegisterAll(); @@ -130,7 +130,7 @@ class ManifestGroupTest : public testing::TestWithParam { }; } - ManifestFile WriteDataManifest(int format_version, int64_t snapshot_id, + ManifestFile WriteDataManifest(int8_t format_version, int64_t snapshot_id, std::vector entries, std::shared_ptr spec) { const std::string manifest_path = MakeManifestPath(); @@ -152,7 +152,7 @@ class ManifestGroupTest : public testing::TestWithParam { return std::move(manifest_result.value()); } - ManifestFile WriteDeleteManifest(int format_version, int64_t snapshot_id, + ManifestFile WriteDeleteManifest(int8_t format_version, int64_t snapshot_id, std::vector entries, std::shared_ptr spec) { const std::string manifest_path = MakeManifestPath(); @@ -187,7 +187,7 @@ class ManifestGroupTest : public testing::TestWithParam { // Write a ManifestFile to a manifest list and read it back. This is useful for v1 // to populate all missing fields like sequence_number. - ManifestFile WriteAndReadManifestListEntry(int format_version, int64_t snapshot_id, + ManifestFile WriteAndReadManifestListEntry(int8_t format_version, int64_t snapshot_id, int64_t sequence_number, const ManifestFile& manifest) { const std::string manifest_list_path = MakeManifestListPath(); @@ -236,7 +236,7 @@ class ManifestGroupTest : public testing::TestWithParam { }; TEST_P(ManifestGroupTest, CreateAndGetEntries) { - int version = GetParam(); + auto version = GetParam(); if (version < 2) { GTEST_SKIP() << "Delete files only supported in V2+"; } @@ -291,7 +291,7 @@ TEST_P(ManifestGroupTest, CreateAndGetEntries) { } TEST_P(ManifestGroupTest, IgnoreDeleted) { - int version = GetParam(); + auto version = GetParam(); constexpr int64_t kSnapshotId = 1000L; constexpr int64_t kSequenceNumber = 1L; @@ -329,7 +329,7 @@ TEST_P(ManifestGroupTest, IgnoreDeleted) { } TEST_P(ManifestGroupTest, IgnoreExisting) { - int version = GetParam(); + auto version = GetParam(); constexpr int64_t kSnapshotId = 1000L; constexpr int64_t kSequenceNumber = 1L; @@ -367,7 +367,7 @@ TEST_P(ManifestGroupTest, IgnoreExisting) { } TEST_P(ManifestGroupTest, CustomManifestEntriesFilter) { - int version = GetParam(); + auto version = GetParam(); constexpr int64_t kSnapshotId = 1000L; const auto part_value = PartitionValues({Literal::Int(0)}); @@ -418,7 +418,7 @@ TEST_P(ManifestGroupTest, EmptyManifestGroup) { } TEST_P(ManifestGroupTest, MultipleDataManifests) { - int version = GetParam(); + auto version = GetParam(); const auto partition_a = PartitionValues({Literal::Int(0)}); const auto partition_b = PartitionValues({Literal::Int(1)}); @@ -451,7 +451,7 @@ TEST_P(ManifestGroupTest, MultipleDataManifests) { } TEST_P(ManifestGroupTest, PartitionFilter) { - int version = GetParam(); + auto version = GetParam(); // Create two files with different partition values (bucket 0 and bucket 1) const auto partition_bucket_0 = PartitionValues({Literal::Int(0)}); diff --git a/src/iceberg/test/manifest_list_versions_test.cc b/src/iceberg/test/manifest_list_versions_test.cc index f7049fad6..9c16a02ec 100644 --- a/src/iceberg/test/manifest_list_versions_test.cc +++ b/src/iceberg/test/manifest_list_versions_test.cc @@ -103,7 +103,7 @@ class TestManifestListVersions : public ::testing::Test { std::chrono::system_clock::now().time_since_epoch().count()); } - std::string WriteManifestList(int format_version, int64_t expected_next_row_id, + std::string WriteManifestList(int8_t format_version, int64_t expected_next_row_id, const std::vector& manifests) const { const std::string manifest_list_path = CreateManifestListPath(); constexpr int64_t kParentSnapshotId = kSnapshotId - 1; @@ -126,7 +126,7 @@ class TestManifestListVersions : public ::testing::Test { return manifest_list_path; } - ManifestFile WriteAndReadManifestList(int format_version) const { + ManifestFile WriteAndReadManifestList(int8_t format_version) const { return ReadManifestList( WriteManifestList(format_version, kSnapshotFirstRowId, {kTestManifest})); } @@ -190,9 +190,9 @@ class TestManifestListVersions : public ::testing::Test { TEST_F(TestManifestListVersions, TestV1WriteDeleteManifest) { const std::string manifest_list_path = CreateManifestListPath(); - ICEBERG_UNWRAP_OR_FAIL(auto writer, - ManifestListWriter::MakeV1Writer(kSnapshotId, kSnapshotId - 1, - manifest_list_path, file_io_)); + ICEBERG_UNWRAP_OR_FAIL(auto writer, ManifestListWriter::MakeWriter( + /*format_version=*/1, kSnapshotId, + kSnapshotId - 1, manifest_list_path, file_io_)); auto status = writer->Add(kDeleteManifest); EXPECT_THAT(status, IsError(ErrorKind::kInvalidManifestList)); @@ -224,7 +224,7 @@ TEST_F(TestManifestListVersions, TestV1Write) { } TEST_F(TestManifestListVersions, TestV2Write) { - auto manifest = WriteAndReadManifestList(2); + auto manifest = WriteAndReadManifestList(/*format_version=*/2); // V3 fields are not written and are defaulted EXPECT_FALSE(manifest.first_row_id.has_value()); @@ -299,7 +299,8 @@ TEST_F(TestManifestListVersions, TestV3WriteMixedRowIdAssignment) { kSnapshotFirstRowId + 2 * (kAddedRows + kExistingRows); auto manifest_list_path = WriteManifestList( - 3, kExpectedNextRowId, {missing_first_row_id, kTestManifest, missing_first_row_id}); + /*format_version=*/3, kExpectedNextRowId, + {missing_first_row_id, kTestManifest, missing_first_row_id}); auto manifests = ReadAllManifests(manifest_list_path); EXPECT_EQ(manifests.size(), 3); @@ -444,7 +445,7 @@ TEST_F(TestManifestListVersions, TestManifestsPartitionSummary) { }; // Test for all format versions - for (int format_version = 1; format_version <= 3; ++format_version) { + for (int8_t format_version = 1; format_version <= 3; ++format_version) { int64_t expected_next_row_id = kSnapshotFirstRowId + manifest.added_rows_count.value() + manifest.existing_rows_count.value(); diff --git a/src/iceberg/test/manifest_reader_stats_test.cc b/src/iceberg/test/manifest_reader_stats_test.cc index d1da17953..a94dca120 100644 --- a/src/iceberg/test/manifest_reader_stats_test.cc +++ b/src/iceberg/test/manifest_reader_stats_test.cc @@ -40,7 +40,7 @@ namespace iceberg { -class TestManifestReaderStats : public testing::TestWithParam { +class TestManifestReaderStats : public testing::TestWithParam { protected: void SetUp() override { avro::RegisterAll(); @@ -86,7 +86,7 @@ class TestManifestReaderStats : public testing::TestWithParam { }); } - ManifestFile WriteManifest(int format_version, std::unique_ptr data_file) { + ManifestFile WriteManifest(int8_t format_version, std::unique_ptr data_file) { const std::string manifest_path = MakeManifestPath(); auto writer_result = ManifestWriter::MakeWriter( @@ -127,7 +127,7 @@ class TestManifestReaderStats : public testing::TestWithParam { }; TEST_P(TestManifestReaderStats, TestReadIncludesFullStats) { - int version = GetParam(); + auto version = GetParam(); auto manifest = WriteManifest(version, MakeDataFileWithStats()); auto reader_result = ManifestReader::Make(manifest, file_io_, schema_, spec_); @@ -142,7 +142,7 @@ TEST_P(TestManifestReaderStats, TestReadIncludesFullStats) { } TEST_P(TestManifestReaderStats, TestReadEntriesWithFilterIncludesFullStats) { - int version = GetParam(); + auto version = GetParam(); auto manifest = WriteManifest(version, MakeDataFileWithStats()); auto reader_result = ManifestReader::Make(manifest, file_io_, schema_, spec_); @@ -159,7 +159,7 @@ TEST_P(TestManifestReaderStats, TestReadEntriesWithFilterIncludesFullStats) { } TEST_P(TestManifestReaderStats, TestReadEntriesWithFilterAndSelectIncludesFullStats) { - int version = GetParam(); + auto version = GetParam(); auto manifest = WriteManifest(version, MakeDataFileWithStats()); auto reader_result = ManifestReader::Make(manifest, file_io_, schema_, spec_); @@ -177,7 +177,7 @@ TEST_P(TestManifestReaderStats, TestReadEntriesWithFilterAndSelectIncludesFullSt } TEST_P(TestManifestReaderStats, TestReadEntriesWithSelectNotProjectStats) { - int version = GetParam(); + auto version = GetParam(); auto manifest = WriteManifest(version, MakeDataFileWithStats()); auto reader_result = ManifestReader::Make(manifest, file_io_, schema_, spec_); @@ -194,7 +194,7 @@ TEST_P(TestManifestReaderStats, TestReadEntriesWithSelectNotProjectStats) { } TEST_P(TestManifestReaderStats, TestReadEntriesWithSelectCertainStatNotProjectStats) { - int version = GetParam(); + auto version = GetParam(); auto manifest = WriteManifest(version, MakeDataFileWithStats()); auto reader_result = ManifestReader::Make(manifest, file_io_, schema_, spec_); diff --git a/src/iceberg/test/manifest_reader_test.cc b/src/iceberg/test/manifest_reader_test.cc index 7604eba9f..3e93f6ff5 100644 --- a/src/iceberg/test/manifest_reader_test.cc +++ b/src/iceberg/test/manifest_reader_test.cc @@ -42,7 +42,7 @@ namespace iceberg { -class TestManifestReader : public testing::TestWithParam { +class TestManifestReader : public testing::TestWithParam { protected: void SetUp() override { avro::RegisterAll(); @@ -78,7 +78,7 @@ class TestManifestReader : public testing::TestWithParam { }); } - ManifestFile WriteManifest(int format_version, std::optional snapshot_id, + ManifestFile WriteManifest(int8_t format_version, std::optional snapshot_id, const std::vector& entries) { const std::string manifest_path = MakeManifestPath(); @@ -134,7 +134,7 @@ class TestManifestReader : public testing::TestWithParam { }); } - ManifestFile WriteDeleteManifest(int format_version, int64_t snapshot_id, + ManifestFile WriteDeleteManifest(int8_t format_version, int64_t snapshot_id, std::vector entries) { const std::string manifest_path = MakeManifestPath(); @@ -162,7 +162,7 @@ class TestManifestReader : public testing::TestWithParam { }; TEST_P(TestManifestReader, TestManifestReaderWithEmptyInheritableMetadata) { - int version = GetParam(); + auto version = GetParam(); auto file_a = MakeDataFile("/path/to/data-a.parquet", PartitionValues({Literal::Int(0)})); @@ -191,7 +191,7 @@ TEST_P(TestManifestReader, TestManifestReaderWithEmptyInheritableMetadata) { } TEST_P(TestManifestReader, TestReaderWithFilterWithoutSelect) { - int version = GetParam(); + auto version = GetParam(); auto file_a = MakeDataFile("/path/to/data-a.parquet", PartitionValues({Literal::Int(0)})); auto file_b = @@ -225,7 +225,7 @@ TEST_P(TestManifestReader, TestReaderWithFilterWithoutSelect) { } TEST_P(TestManifestReader, TestManifestReaderWithPartitionMetadata) { - int version = GetParam(); + auto version = GetParam(); auto file_a = MakeDataFile("/path/to/data-a.parquet", PartitionValues({Literal::Int(0)})); auto entry = @@ -254,7 +254,7 @@ TEST_P(TestManifestReader, TestManifestReaderWithPartitionMetadata) { } TEST_P(TestManifestReader, TestDeleteFilesWithReferences) { - int version = GetParam(); + auto version = GetParam(); if (version < 2) { GTEST_SKIP() << "Delete files only supported in V2+"; } @@ -293,7 +293,7 @@ TEST_P(TestManifestReader, TestDeleteFilesWithReferences) { } TEST_P(TestManifestReader, TestDVs) { - int version = GetParam(); + auto version = GetParam(); if (version < 3) { GTEST_SKIP() << "DVs only supported in V3+"; } @@ -337,7 +337,7 @@ TEST_P(TestManifestReader, TestDVs) { } TEST_P(TestManifestReader, TestInvalidUsage) { - int version = GetParam(); + auto version = GetParam(); auto file_a = MakeDataFile("/path/to/data-a.parquet", PartitionValues({Literal::Int(0)})); auto entry = @@ -354,7 +354,7 @@ TEST_P(TestManifestReader, TestInvalidUsage) { } TEST_P(TestManifestReader, TestDropStats) { - int version = GetParam(); + auto version = GetParam(); // Create a data file with full metrics auto file_with_stats = std::make_unique(DataFile{ diff --git a/src/iceberg/test/manifest_writer_versions_test.cc b/src/iceberg/test/manifest_writer_versions_test.cc index cc4e804be..fc61980a7 100644 --- a/src/iceberg/test/manifest_writer_versions_test.cc +++ b/src/iceberg/test/manifest_writer_versions_test.cc @@ -157,7 +157,7 @@ class ManifestWriterVersionsTest : public ::testing::Test { std::chrono::system_clock::now().time_since_epoch().count()); } - std::string WriteManifests(int format_version, + std::string WriteManifests(int8_t format_version, const std::vector& manifests) const { const std::string manifest_list_path = CreateManifestListPath(); constexpr int64_t kParentSnapshotId = kSnapshotId - 1; @@ -175,7 +175,7 @@ class ManifestWriterVersionsTest : public ::testing::Test { } std::vector WriteAndReadManifests( - const std::vector& manifests, int format_version) const { + const std::vector& manifests, int8_t format_version) const { return ReadManifests(WriteManifests(format_version, manifests)); } @@ -195,7 +195,7 @@ class ManifestWriterVersionsTest : public ::testing::Test { std::chrono::system_clock::now().time_since_epoch().count()); } - ManifestFile WriteManifest(int format_version, + ManifestFile WriteManifest(int8_t format_version, std::vector> data_files) { const std::string manifest_path = CreateManifestPath(); @@ -228,7 +228,7 @@ class ManifestWriterVersionsTest : public ::testing::Test { return entries_result.value(); } - ManifestFile WriteDeleteManifest(int format_version, + ManifestFile WriteDeleteManifest(int8_t format_version, std::shared_ptr delete_file) { const std::string manifest_path = CreateManifestPath(); @@ -249,7 +249,7 @@ class ManifestWriterVersionsTest : public ::testing::Test { return std::move(manifest_result.value()); } - ManifestFile RewriteManifest(const ManifestFile& old_manifest, int format_version) { + ManifestFile RewriteManifest(const ManifestFile& old_manifest, int8_t format_version) { auto entries = ReadManifest(old_manifest); const std::string manifest_path = CreateManifestPath(); @@ -422,8 +422,8 @@ TEST_F(ManifestWriterVersionsTest, TestV1Write) { TEST_F(ManifestWriterVersionsTest, TestV1WriteDelete) { const std::string manifest_path = CreateManifestPath(); ICEBERG_UNWRAP_OR_FAIL( - auto writer, - ManifestWriter::MakeV1Writer(kSnapshotId, manifest_path, file_io_, spec_, schema_)); + auto writer, ManifestWriter::MakeWriter(/*format_version=*/1, kSnapshotId, + manifest_path, file_io_, spec_, schema_)); ManifestEntry entry; entry.snapshot_id = kSnapshotId; @@ -485,7 +485,7 @@ TEST_F(ManifestWriterVersionsTest, TestV2ManifestListRewriteWithInheritance) { TableMetadata::kInitialSequenceNumber); // rewrite existing metadata with v2 manifest list - auto manifests2 = WriteAndReadManifests(manifests, 2); + auto manifests2 = WriteAndReadManifests(manifests, /*format_version=*/2); // the ManifestFile did not change and should still have its original sequence number, 0 CheckManifest(manifests2[0], TableMetadata::kInitialSequenceNumber, TableMetadata::kInitialSequenceNumber); @@ -504,12 +504,12 @@ TEST_F(ManifestWriterVersionsTest, TestV2ManifestRewriteWithInheritance) { TableMetadata::kInitialSequenceNumber); // rewrite the manifest file using a v2 manifest - auto rewritten_manifest = RewriteManifest(manifests[0], 2); + auto rewritten_manifest = RewriteManifest(manifests[0], /*format_version=*/2); CheckRewrittenManifest(rewritten_manifest, kInvalidSequenceNumber, TableMetadata::kInitialSequenceNumber); // add the v2 manifest to a v2 manifest list, with a sequence number - auto manifests2 = WriteAndReadManifests({rewritten_manifest}, 2); + auto manifests2 = WriteAndReadManifests({rewritten_manifest}, /*format_version=*/2); // the ManifestFile is new so it has a sequence number, but the min sequence number 0 is // from the entry CheckRewrittenManifest(manifests2[0], kSequenceNumber, @@ -573,7 +573,7 @@ TEST_F(ManifestWriterVersionsTest, TestV3ManifestListRewriteWithInheritance) { TableMetadata::kInitialSequenceNumber); // rewrite existing metadata with a manifest list - auto manifests3 = WriteAndReadManifests(manifests, 3); + auto manifests3 = WriteAndReadManifests(manifests, /*format_version=*/3); // the ManifestFile did not change and should still have its original sequence number, 0 CheckManifest(manifests3[0], TableMetadata::kInitialSequenceNumber, TableMetadata::kInitialSequenceNumber); @@ -593,12 +593,12 @@ TEST_F(ManifestWriterVersionsTest, TestV3ManifestRewriteWithInheritance) { TableMetadata::kInitialSequenceNumber); // rewrite the manifest file using a v3 manifest - auto rewritten_manifest = RewriteManifest(manifests[0], 3); + auto rewritten_manifest = RewriteManifest(manifests[0], /*format_version=*/3); CheckRewrittenManifest(rewritten_manifest, kInvalidSequenceNumber, TableMetadata::kInitialSequenceNumber); // add the v3 manifest to a v3 manifest list, with a sequence number - auto manifests3 = WriteAndReadManifests({rewritten_manifest}, 3); + auto manifests3 = WriteAndReadManifests({rewritten_manifest}, /*format_version=*/3); // the ManifestFile is new so it has a sequence number, but the min sequence number 0 is // from the entry CheckRewrittenManifest(manifests3[0], kSequenceNumber, diff --git a/src/iceberg/test/rolling_manifest_writer_test.cc b/src/iceberg/test/rolling_manifest_writer_test.cc index 439359bc8..b996eb166 100644 --- a/src/iceberg/test/rolling_manifest_writer_test.cc +++ b/src/iceberg/test/rolling_manifest_writer_test.cc @@ -86,7 +86,7 @@ static std::string CreateManifestPath() { } // namespace -class RollingManifestWriterTest : public ::testing::TestWithParam { +class RollingManifestWriterTest : public ::testing::TestWithParam { protected: void SetUp() override { avro::RegisterAll(); @@ -106,7 +106,7 @@ class RollingManifestWriterTest : public ::testing::TestWithParam { } RollingManifestWriter::ManifestWriterFactory NewRollingWriteManifestFactory( - int32_t format_version) { + int8_t format_version) { return [this, format_version]() -> Result> { const std::string manifest_path = CreateManifestPath(); return ManifestWriter::MakeWriter(format_version, kSnapshotId, manifest_path, @@ -116,7 +116,7 @@ class RollingManifestWriterTest : public ::testing::TestWithParam { } RollingManifestWriter::ManifestWriterFactory NewRollingWriteDeleteManifestFactory( - int32_t format_version) { + int8_t format_version) { return [this, format_version]() -> Result> { const std::string manifest_path = CreateManifestPath(); return ManifestWriter::MakeWriter(format_version, kSnapshotId, manifest_path, @@ -155,7 +155,7 @@ class RollingManifestWriterTest : public ::testing::TestWithParam { }; TEST_P(RollingManifestWriterTest, TestRollingManifestWriterNoRecords) { - int32_t format_version = GetParam(); + auto format_version = GetParam(); RollingManifestWriter writer(NewRollingWriteManifestFactory(format_version), kSmallFileSize); @@ -170,7 +170,7 @@ TEST_P(RollingManifestWriterTest, TestRollingManifestWriterNoRecords) { } TEST_P(RollingManifestWriterTest, TestRollingDeleteManifestWriterNoRecords) { - int32_t format_version = GetParam(); + auto format_version = GetParam(); if (format_version < 2) { GTEST_SKIP() << "Delete manifests only supported in V2+"; } @@ -188,7 +188,7 @@ TEST_P(RollingManifestWriterTest, TestRollingDeleteManifestWriterNoRecords) { } TEST_P(RollingManifestWriterTest, TestRollingManifestWriterSplitFiles) { - int32_t format_version = GetParam(); + auto format_version = GetParam(); RollingManifestWriter writer(NewRollingWriteManifestFactory(format_version), kSmallFileSize); @@ -239,7 +239,7 @@ TEST_P(RollingManifestWriterTest, TestRollingManifestWriterSplitFiles) { } TEST_P(RollingManifestWriterTest, TestRollingDeleteManifestWriterSplitFiles) { - int32_t format_version = GetParam(); + auto format_version = GetParam(); if (format_version < 2) { GTEST_SKIP() << "Delete manifests only supported in V2+"; } diff --git a/src/iceberg/test/table_scan_test.cc b/src/iceberg/test/table_scan_test.cc index b496606cf..2eed86732 100644 --- a/src/iceberg/test/table_scan_test.cc +++ b/src/iceberg/test/table_scan_test.cc @@ -47,7 +47,7 @@ namespace iceberg { -class TableScanTest : public testing::TestWithParam { +class TableScanTest : public testing::TestWithParam { protected: void SetUp() override { avro::RegisterAll(); @@ -175,7 +175,7 @@ class TableScanTest : public testing::TestWithParam { }; } - ManifestFile WriteDataManifest(int format_version, int64_t snapshot_id, + ManifestFile WriteDataManifest(int8_t format_version, int64_t snapshot_id, std::vector entries, std::shared_ptr spec) { const std::string manifest_path = MakeManifestPath(); @@ -197,7 +197,7 @@ class TableScanTest : public testing::TestWithParam { return std::move(manifest_result.value()); } - ManifestFile WriteDeleteManifest(int format_version, int64_t snapshot_id, + ManifestFile WriteDeleteManifest(int8_t format_version, int64_t snapshot_id, std::vector entries, std::shared_ptr spec) { const std::string manifest_path = MakeManifestPath(); @@ -224,7 +224,7 @@ class TableScanTest : public testing::TestWithParam { std::chrono::system_clock::now().time_since_epoch().count()); } - std::string WriteManifestList(int format_version, int64_t snapshot_id, + std::string WriteManifestList(int8_t format_version, int64_t snapshot_id, int64_t sequence_number, const std::vector& manifests) { const std::string manifest_list_path = MakeManifestListPath(); @@ -367,7 +367,7 @@ TEST_P(TableScanTest, DataTableScanPlanFilesEmpty) { } TEST_P(TableScanTest, PlanFilesWithDataManifests) { - int version = GetParam(); + auto version = GetParam(); constexpr int64_t kSnapshotId = 1000L; const auto part_value = PartitionValues({Literal::Int(0)}); @@ -427,7 +427,7 @@ TEST_P(TableScanTest, PlanFilesWithDataManifests) { } TEST_P(TableScanTest, PlanFilesWithMultipleManifests) { - int version = GetParam(); + auto version = GetParam(); const auto partition_a = PartitionValues({Literal::Int(0)}); const auto partition_b = PartitionValues({Literal::Int(1)}); @@ -494,7 +494,7 @@ TEST_P(TableScanTest, PlanFilesWithMultipleManifests) { } TEST_P(TableScanTest, PlanFilesWithFilter) { - int version = GetParam(); + auto version = GetParam(); constexpr int64_t kSnapshotId = 1000L; const auto part_value = PartitionValues({Literal::Int(0)}); @@ -587,7 +587,7 @@ TEST_P(TableScanTest, PlanFilesWithFilter) { } TEST_P(TableScanTest, PlanFilesWithDeleteFiles) { - int version = GetParam(); + auto version = GetParam(); if (version < 2) { GTEST_SKIP() << "Delete files only supported in V2+"; } diff --git a/src/iceberg/update/update_partition_spec.cc b/src/iceberg/update/update_partition_spec.cc index 54c3dc60a..812540f2b 100644 --- a/src/iceberg/update/update_partition_spec.cc +++ b/src/iceberg/update/update_partition_spec.cc @@ -32,7 +32,6 @@ #include "iceberg/table_metadata.h" #include "iceberg/transaction.h" #include "iceberg/transform.h" -#include "iceberg/util/checked_cast.h" #include "iceberg/util/macros.h" namespace iceberg {