diff --git a/include/paimon/catalog/catalog.h b/include/paimon/catalog/catalog.h index 2c5bc6d21..0ff9349bd 100644 --- a/include/paimon/catalog/catalog.h +++ b/include/paimon/catalog/catalog.h @@ -169,8 +169,9 @@ class PAIMON_EXPORT Catalog { /// @note This does not check whether the table actually exists. /// /// @param identifier The table identifier containing database and table name. - /// @return A string representing the expected location of the table. - virtual std::string GetTableLocation(const Identifier& identifier) const = 0; + /// @return A result containing the expected location of the table, or an error status on + /// failure. + virtual Result GetTableLocation(const Identifier& identifier) const = 0; /// Returns the root path of the catalog. /// diff --git a/include/paimon/defs.h b/include/paimon/defs.h index 70f2aa0b0..abe262986 100644 --- a/include/paimon/defs.h +++ b/include/paimon/defs.h @@ -391,6 +391,10 @@ struct PAIMON_EXPORT Options { /// configured by 'blob-external-storage-field' is written at write time. Orphan file cleanup is /// not applied to this path. No default value. static const char BLOB_EXTERNAL_STORAGE_PATH[]; + /// "blob-view-upstream-warehouse" - Since the catalog capabilities are partially missing, when + /// Blob View is enabled, cpp paimon cannot automatically obtain the upstream table warehouse + /// path and requires manual configuration by the user. No default value. + static const char BLOB_VIEW_UPSTREAM_WAREHOUSE[]; /// "global-index.enabled" - Whether to enable global index for scan. Default value is "true". static const char GLOBAL_INDEX_ENABLED[]; /// "global-index.thread-num" - The maximum number of concurrent scanner for global index. No diff --git a/include/paimon/executor.h b/include/paimon/executor.h index 0ba96e31d..40ba447b6 100644 --- a/include/paimon/executor.h +++ b/include/paimon/executor.h @@ -54,6 +54,9 @@ class PAIMON_EXPORT Executor { /// Shutdown the executor immediately, discarding all pending tasks. virtual void ShutdownNow() = 0; + + /// Get thread number. + virtual uint32_t GetThreadNum() const = 0; }; } // namespace paimon diff --git a/src/paimon/common/data/blob_utils.cpp b/src/paimon/common/data/blob_utils.cpp index 75ff4302a..39a780561 100644 --- a/src/paimon/common/data/blob_utils.cpp +++ b/src/paimon/common/data/blob_utils.cpp @@ -26,6 +26,7 @@ #include "fmt/format.h" #include "paimon/common/data/blob_defs.h" #include "paimon/common/data/blob_descriptor.h" +#include "paimon/common/data/blob_view_struct.h" #include "paimon/common/types/data_field.h" #include "paimon/common/utils/arrow/status_utils.h" #include "paimon/common/utils/string_utils.h" @@ -155,10 +156,47 @@ Status BlobUtils::ValidateInlineBlobDescriptors( PAIMON_ASSIGN_OR_RAISE(bool is_descriptor, BlobDescriptor::IsBlobDescriptor(value.data(), value.size())); if (!is_descriptor) { - return Status::Invalid(fmt::format( - "BLOB inline field {} configured by blob-descriptor-field or blob-view-field " - "require values to be a BlobDescriptor or BlobViewStruct.", - field_name)); + return Status::Invalid( + fmt::format("BLOB inline field {} configured by blob-descriptor-field require " + "values to be a BlobDescriptor.", + field_name)); + } + } + } + return Status::OK(); +} + +Status BlobUtils::ValidateBlobViewFields(const std::shared_ptr& struct_array, + const std::set& view_fields) { + if (view_fields.empty()) { + return Status::OK(); + } + if (!struct_array) { + return Status::Invalid("array in ValidateBlobViewFields must be a struct_array"); + } + for (const auto& field_name : view_fields) { + auto field_array = struct_array->GetFieldByName(field_name); + if (!field_array) { + continue; + } + const auto* binary_array = + arrow::internal::checked_cast(field_array.get()); + if (!binary_array) { + return Status::Invalid( + fmt::format("cannot cast array for field {} to LargeBinaryArray", field_name)); + } + for (int64_t row = 0; row < binary_array->length(); ++row) { + if (binary_array->IsNull(row)) { + continue; + } + auto value = binary_array->GetView(row); + PAIMON_ASSIGN_OR_RAISE(bool is_view, + BlobViewStruct::IsBlobViewStruct(value.data(), value.size())); + if (!is_view) { + return Status::Invalid( + fmt::format("BLOB inline field {} configured by blob-view-field require values " + "to be a BlobViewStruct.", + field_name)); } } } diff --git a/src/paimon/common/data/blob_utils.h b/src/paimon/common/data/blob_utils.h index 211f15f84..23b230165 100644 --- a/src/paimon/common/data/blob_utils.h +++ b/src/paimon/common/data/blob_utils.h @@ -80,6 +80,9 @@ class PAIMON_EXPORT BlobUtils { const std::shared_ptr& struct_array, const std::set& inline_descriptor_fields); + static Status ValidateBlobViewFields(const std::shared_ptr& struct_array, + const std::set& view_fields); + /// Converts inline blob DataFields from large_binary to binary type. /// Inline blob fields use large_binary in the table schema (because they are BLOB type), /// but are stored as binary in data files. This conversion aligns the field type with diff --git a/src/paimon/common/data/blob_utils_test.cpp b/src/paimon/common/data/blob_utils_test.cpp index f2a02f10d..b043dcf4c 100644 --- a/src/paimon/common/data/blob_utils_test.cpp +++ b/src/paimon/common/data/blob_utils_test.cpp @@ -19,8 +19,10 @@ #include "arrow/api.h" #include "arrow/c/bridge.h" #include "gtest/gtest.h" +#include "paimon/catalog/identifier.h" #include "paimon/common/data/blob_defs.h" #include "paimon/common/data/blob_descriptor.h" +#include "paimon/common/data/blob_view_struct.h" #include "paimon/common/types/data_field.h" #include "paimon/data/blob.h" #include "paimon/memory/memory_pool.h" @@ -29,44 +31,47 @@ namespace paimon::test { class BlobUtilsTest : public ::testing::Test { - private: + public: std::shared_ptr CreateBlobMetadata() { std::unordered_map blob_metadata_map = { {BlobDefs::kExtensionTypeKey, BlobDefs::kExtensionTypeValue}}; return std::make_shared(blob_metadata_map); } + + private: + std::shared_ptr pool_ = GetDefaultPool(); }; TEST_F(BlobUtilsTest, IsBlobMetadata) { auto correct_metadata = CreateBlobMetadata(); - EXPECT_TRUE(BlobUtils::IsBlobMetadata(correct_metadata)); - EXPECT_FALSE(BlobUtils::IsBlobMetadata(nullptr)); + ASSERT_TRUE(BlobUtils::IsBlobMetadata(correct_metadata)); + ASSERT_FALSE(BlobUtils::IsBlobMetadata(nullptr)); std::unordered_map wrong_metadata_map = { {BlobDefs::kExtensionTypeKey, "paimon.type.varchar"}}; auto wrong_metadata = std::make_shared(wrong_metadata_map); - EXPECT_FALSE(BlobUtils::IsBlobMetadata(wrong_metadata)); + ASSERT_FALSE(BlobUtils::IsBlobMetadata(wrong_metadata)); std::unordered_map no_extension_metadata_map = { {"other_key", BlobDefs::kExtensionTypeValue}}; auto no_extension_metadata = std::make_shared(no_extension_metadata_map); - EXPECT_FALSE(BlobUtils::IsBlobMetadata(no_extension_metadata)); + ASSERT_FALSE(BlobUtils::IsBlobMetadata(no_extension_metadata)); } TEST_F(BlobUtilsTest, IsBlobField) { std::shared_ptr blob_field = BlobUtils::ToArrowField("f1", true); - EXPECT_TRUE(BlobUtils::IsBlobField(blob_field)); + ASSERT_TRUE(BlobUtils::IsBlobField(blob_field)); auto int_field = arrow::field("i_int", arrow::int32()); - EXPECT_FALSE(BlobUtils::IsBlobField(int_field)); + ASSERT_FALSE(BlobUtils::IsBlobField(int_field)); auto binary_field_no_meta = arrow::field("b_no_meta", arrow::large_binary()); - EXPECT_FALSE(BlobUtils::IsBlobField(binary_field_no_meta)); + ASSERT_FALSE(BlobUtils::IsBlobField(binary_field_no_meta)); auto wrong_meta = std::make_shared( std::unordered_map{{"other_key", "value"}}); auto binary_field_wrong_meta = arrow::field("b_wrong_meta", arrow::large_binary(), false, wrong_meta); - EXPECT_FALSE(BlobUtils::IsBlobField(binary_field_wrong_meta)); + ASSERT_FALSE(BlobUtils::IsBlobField(binary_field_wrong_meta)); } TEST_F(BlobUtilsTest, SeparateBlobSchema) { @@ -250,9 +255,8 @@ TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsFieldNotPresent) { TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsWithValidDescriptor) { // Valid BlobDescriptor bytes -> OK - auto pool = GetDefaultPool(); ASSERT_OK_AND_ASSIGN(auto descriptor, BlobDescriptor::Create("file:///tmp/test.bin", 0, 100)); - auto serialized = descriptor->Serialize(pool); + auto serialized = descriptor->Serialize(pool_); arrow::LargeBinaryBuilder builder; ASSERT_TRUE(builder.Append(serialized->data(), serialized->size()).ok()); @@ -282,18 +286,15 @@ TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsWithRawBytes) { auto struct_array = arrow::StructArray::Make({blob_array}, {BlobUtils::ToArrowField("b0")}).ValueOrDie(); auto sa = std::dynamic_pointer_cast(struct_array); - ASSERT_NOK_WITH_MSG( - BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0"}), - "BLOB inline field b0 configured by blob-descriptor-field or blob-view-field " - "require values to be a BlobDescriptor or BlobViewStruct."); + ASSERT_NOK_WITH_MSG(BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0"}), + "BLOB inline field b0 configured by blob-descriptor-field require values " + "to be a BlobDescriptor."); } TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsMixedValidAndInvalid) { // First row is valid descriptor, second row is raw bytes -> error on row 1 - auto pool = GetDefaultPool(); ASSERT_OK_AND_ASSIGN(auto descriptor, BlobDescriptor::Create("file:///tmp/test.bin", 0, 100)); - auto serialized = descriptor->Serialize(pool); - + auto serialized = descriptor->Serialize(pool_); arrow::LargeBinaryBuilder builder; ASSERT_TRUE(builder.Append(serialized->data(), serialized->size()).ok()); ASSERT_TRUE(builder.Append("raw_bytes_not_descriptor").ok()); @@ -301,17 +302,15 @@ TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsMixedValidAndInvalid) { auto struct_array = arrow::StructArray::Make({blob_array}, {BlobUtils::ToArrowField("b0")}).ValueOrDie(); auto sa = std::dynamic_pointer_cast(struct_array); - ASSERT_NOK_WITH_MSG( - BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0"}), - "BLOB inline field b0 configured by blob-descriptor-field or blob-view-field " - "require values to be a BlobDescriptor or BlobViewStruct."); + ASSERT_NOK_WITH_MSG(BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0"}), + "BLOB inline field b0 configured by blob-descriptor-field require values " + "to be a BlobDescriptor."); } TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsMultipleFields) { // Two inline fields: b0 is valid, b1 has raw bytes -> error on b1 - auto pool = GetDefaultPool(); ASSERT_OK_AND_ASSIGN(auto descriptor, BlobDescriptor::Create("file:///tmp/test.bin", 0, 100)); - auto serialized = descriptor->Serialize(pool); + auto serialized = descriptor->Serialize(pool_); arrow::LargeBinaryBuilder b0_builder; ASSERT_TRUE(b0_builder.Append(serialized->data(), serialized->size()).ok()); @@ -326,10 +325,86 @@ TEST_F(BlobUtilsTest, ValidateInlineBlobDescriptorsMultipleFields) { {BlobUtils::ToArrowField("b0"), BlobUtils::ToArrowField("b1")}) .ValueOrDie(); auto sa = std::dynamic_pointer_cast(struct_array); - ASSERT_NOK_WITH_MSG( - BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0", "b1"}), - "BLOB inline field b1 configured by blob-descriptor-field or blob-view-field " - "require values to be a BlobDescriptor or BlobViewStruct."); + ASSERT_NOK_WITH_MSG(BlobUtils::ValidateInlineBlobDescriptors(sa, {"b0", "b1"}), + "BLOB inline field b1 configured by blob-descriptor-field require values " + "to be a BlobDescriptor."); +} + +TEST_F(BlobUtilsTest, ValidateBlobViewFieldsEmptyFields) { + // Empty view_fields -> always OK + arrow::LargeBinaryBuilder builder; + ASSERT_TRUE(builder.Append("random_data").ok()); + auto array = builder.Finish().ValueOrDie(); + auto struct_array = + arrow::StructArray::Make({array}, {BlobUtils::ToArrowField("view")}).ValueOrDie(); + auto sa = std::dynamic_pointer_cast(struct_array); + ASSERT_OK(BlobUtils::ValidateBlobViewFields(sa, {})); +} + +TEST_F(BlobUtilsTest, ValidateBlobViewFieldsFieldNotPresent) { + // Field not in struct_array -> skip, OK + arrow::Int32Builder int_builder; + ASSERT_TRUE(int_builder.Append(42).ok()); + auto int_array = int_builder.Finish().ValueOrDie(); + auto struct_array = + arrow::StructArray::Make({int_array}, {arrow::field("f0", arrow::int32())}).ValueOrDie(); + auto sa = std::dynamic_pointer_cast(struct_array); + ASSERT_OK(BlobUtils::ValidateBlobViewFields(sa, {"view"})); +} + +TEST_F(BlobUtilsTest, ValidateBlobViewFieldsWithValidViewStruct) { + // A BlobViewStruct value is accepted for a view field. + BlobViewStruct view_struct(Identifier("db", "tbl"), /*field_id=*/2, /*row_id=*/5); + auto serialized = view_struct.Serialize(pool_); + + arrow::LargeBinaryBuilder builder; + ASSERT_TRUE(builder.Append(serialized->data(), serialized->size()).ok()); + auto blob_array = builder.Finish().ValueOrDie(); + auto struct_array = + arrow::StructArray::Make({blob_array}, {BlobUtils::ToArrowField("view")}).ValueOrDie(); + auto sa = std::dynamic_pointer_cast(struct_array); + ASSERT_OK(BlobUtils::ValidateBlobViewFields(sa, {"view"})); +} + +TEST_F(BlobUtilsTest, ValidateBlobViewFieldsWithNullValue) { + // Null values in view column -> skip, OK + arrow::LargeBinaryBuilder builder; + ASSERT_TRUE(builder.AppendNull().ok()); + auto blob_array = builder.Finish().ValueOrDie(); + auto struct_array = + arrow::StructArray::Make({blob_array}, {BlobUtils::ToArrowField("view")}).ValueOrDie(); + auto sa = std::dynamic_pointer_cast(struct_array); + ASSERT_OK(BlobUtils::ValidateBlobViewFields(sa, {"view"})); +} + +TEST_F(BlobUtilsTest, ValidateBlobViewFieldsWithRawBytes) { + // Raw bytes -> error + arrow::LargeBinaryBuilder builder; + ASSERT_TRUE(builder.Append("raw_bytes_not_view").ok()); + auto blob_array = builder.Finish().ValueOrDie(); + auto struct_array = + arrow::StructArray::Make({blob_array}, {BlobUtils::ToArrowField("view")}).ValueOrDie(); + auto sa = std::dynamic_pointer_cast(struct_array); + ASSERT_NOK_WITH_MSG(BlobUtils::ValidateBlobViewFields(sa, {"view"}), + "BLOB inline field view configured by blob-view-field require values to be " + "a BlobViewStruct."); +} + +TEST_F(BlobUtilsTest, ValidateBlobViewFieldsRejectsBlobDescriptor) { + // A BlobDescriptor value is NOT accepted for a view field. + auto pool = GetDefaultPool(); + ASSERT_OK_AND_ASSIGN(auto descriptor, BlobDescriptor::Create("file:///tmp/test.bin", 0, 100)); + auto serialized = descriptor->Serialize(pool); + + arrow::LargeBinaryBuilder builder; + ASSERT_TRUE(builder.Append(serialized->data(), serialized->size()).ok()); + auto blob_array = builder.Finish().ValueOrDie(); + auto struct_array = + arrow::StructArray::Make({blob_array}, {BlobUtils::ToArrowField("view")}).ValueOrDie(); + auto sa = std::dynamic_pointer_cast(struct_array); + ASSERT_NOK_WITH_MSG(BlobUtils::ValidateBlobViewFields(sa, {"view"}), + "BLOB inline field view configured by blob-view-field require values " + "to be a BlobViewStruct."); } TEST_F(BlobUtilsTest, TestConvertBlobInlineDataFields) { diff --git a/src/paimon/common/data/blob_view_struct_test.cpp b/src/paimon/common/data/blob_view_struct_test.cpp index 58c68fa3d..9db75bc6b 100644 --- a/src/paimon/common/data/blob_view_struct_test.cpp +++ b/src/paimon/common/data/blob_view_struct_test.cpp @@ -106,4 +106,50 @@ TEST_F(BlobViewStructTest, TestEqual) { } } +TEST_F(BlobViewStructTest, TestIsBlobViewStructValid) { + auto serialized = view_struct_.Serialize(pool_); + ASSERT_OK_AND_ASSIGN(bool result, + BlobViewStruct::IsBlobViewStruct(serialized->data(), serialized->size())); + ASSERT_TRUE(result); +} + +TEST_F(BlobViewStructTest, TestIsBlobViewStructWithTooShortBuffer) { + // Buffer shorter than 9 bytes should return false + std::vector short_buffer = {0x02, 0x43, 0x53, 0x45, 0x44, 0x42, 0x4F, 0x4C}; + ASSERT_OK_AND_ASSIGN( + bool result, BlobViewStruct::IsBlobViewStruct(short_buffer.data(), short_buffer.size())); + ASSERT_FALSE(result); + + // Empty buffer + ASSERT_OK_AND_ASSIGN(bool empty_result, BlobViewStruct::IsBlobViewStruct(nullptr, 0)); + ASSERT_FALSE(empty_result); +} + +TEST_F(BlobViewStructTest, TestIsBlobViewStructWithFutureVersion) { + // Version > CURRENT_VERSION should return false (not an error) + auto serialized = view_struct_.Serialize(pool_); + (*serialized)[0] = '\x02'; // set version to 2 (> CURRENT_VERSION) + ASSERT_OK_AND_ASSIGN(bool result, + BlobViewStruct::IsBlobViewStruct(serialized->data(), serialized->size())); + ASSERT_FALSE(result); +} + +TEST_F(BlobViewStructTest, TestIsBlobViewStructWithWrongMagic) { + // Wrong magic number should return false + auto serialized = view_struct_.Serialize(pool_); + // Corrupt the magic bytes (bytes 1-8) + (*serialized)[1] = '\x00'; + (*serialized)[2] = '\x00'; + ASSERT_OK_AND_ASSIGN(bool result, + BlobViewStruct::IsBlobViewStruct(serialized->data(), serialized->size())); + ASSERT_FALSE(result); +} + +TEST_F(BlobViewStructTest, TestIsBlobViewStructWithRandomData) { + // Random data that doesn't match format + std::vector random_data = {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09}; + ASSERT_OK_AND_ASSIGN(bool result, + BlobViewStruct::IsBlobViewStruct(random_data.data(), random_data.size())); + ASSERT_FALSE(result); +} } // namespace paimon::test diff --git a/src/paimon/common/defs.cpp b/src/paimon/common/defs.cpp index b328213d6..393ad5b42 100644 --- a/src/paimon/common/defs.cpp +++ b/src/paimon/common/defs.cpp @@ -96,6 +96,7 @@ const char Options::BLOB_FIELD[] = "blob-field"; const char Options::BLOB_DESCRIPTOR_FIELD[] = "blob-descriptor-field"; const char Options::FALLBACK_BLOB_DESCRIPTOR_FIELD[] = "blob.stored-descriptor-fields"; const char Options::BLOB_VIEW_FIELD[] = "blob-view-field"; +const char Options::BLOB_VIEW_UPSTREAM_WAREHOUSE[] = "blob-view-upstream-warehouse"; const char Options::BLOB_EXTERNAL_STORAGE_FIELD[] = "blob-external-storage-field"; const char Options::BLOB_EXTERNAL_STORAGE_PATH[] = "blob-external-storage-path"; const char Options::GLOBAL_INDEX_ENABLED[] = "global-index.enabled"; diff --git a/src/paimon/common/executor/executor.cpp b/src/paimon/common/executor/executor.cpp index f52a98fad..8928c8b0d 100644 --- a/src/paimon/common/executor/executor.cpp +++ b/src/paimon/common/executor/executor.cpp @@ -32,14 +32,14 @@ class DefaultExecutor : public Executor { ~DefaultExecutor() override; void Add(std::function func) override; - void ShutdownNow() override; + uint32_t GetThreadNum() const override; private: void WorkerThread(); - void ShutdownInternal(bool wait_for_pending_tasks); + private: uint32_t thread_count_; std::vector workers_; std::queue> tasks_; @@ -55,6 +55,10 @@ DefaultExecutor::DefaultExecutor(uint32_t thread_count) : thread_count_(thread_c } } +uint32_t DefaultExecutor::GetThreadNum() const { + return thread_count_; +} + void DefaultExecutor::ShutdownInternal(bool wait_for_pending_tasks) { { std::unique_lock lock(queue_mutex_); diff --git a/src/paimon/core/append/append_only_writer.cpp b/src/paimon/core/append/append_only_writer.cpp index b23975d90..f67ba57ad 100644 --- a/src/paimon/core/append/append_only_writer.cpp +++ b/src/paimon/core/append/append_only_writer.cpp @@ -94,7 +94,6 @@ Status AppendOnlyWriter::Write(std::unique_ptr&& batch) { PAIMON_ASSIGN_OR_RAISE(std::shared_ptr transformed, external_storage_writer_->TransformBatch(struct_array)); auto transformed_struct = std::dynamic_pointer_cast(transformed); - // TODO(lc.lsz): validate blob view PAIMON_RETURN_NOT_OK(BlobUtils::ValidateInlineBlobDescriptors(transformed_struct, inline_descriptor_fields_)); ::ArrowArray c_transformed; @@ -102,14 +101,14 @@ Status AppendOnlyWriter::Write(std::unique_ptr&& batch) { return writer_->Write(&c_transformed); } - if (!inline_descriptor_fields_.empty()) { + if (!inline_descriptor_fields_.empty() || !inline_view_fields_.empty()) { auto data_type = arrow::struct_(write_schema_->fields()); PAIMON_ASSIGN_OR_RAISE_FROM_ARROW(std::shared_ptr arrow_array, arrow::ImportArray(batch->GetData(), data_type)); auto struct_array = std::dynamic_pointer_cast(arrow_array); - // TODO(lc.lsz): validate blob view PAIMON_RETURN_NOT_OK( BlobUtils::ValidateInlineBlobDescriptors(struct_array, inline_descriptor_fields_)); + PAIMON_RETURN_NOT_OK(BlobUtils::ValidateBlobViewFields(struct_array, inline_view_fields_)); ::ArrowArray c_array; PAIMON_RETURN_NOT_OK_FROM_ARROW(arrow::ExportArray(*struct_array, &c_array)); return writer_->Write(&c_array); @@ -189,9 +188,10 @@ AppendOnlyWriter::RollingFileWriterResult AppendOnlyWriter::CreateRollingRowWrit auto blob_context = BlobFileContext::Create(write_schema_, options_); std::optional> main_write_cols = write_cols_; - // Save inline descriptor fields for validation in Write() + // Save inline descriptor and view fields for validation in Write() if (blob_context) { inline_descriptor_fields_ = blob_context->GetDescriptorFields(); + inline_view_fields_ = blob_context->GetViewFields(); } // Initialize ExternalStorageBlobWriter if needed diff --git a/src/paimon/core/append/append_only_writer.h b/src/paimon/core/append/append_only_writer.h index d1b4339d8..25bfe05ef 100644 --- a/src/paimon/core/append/append_only_writer.h +++ b/src/paimon/core/append/append_only_writer.h @@ -135,6 +135,7 @@ class AppendOnlyWriter : public BatchWriter { std::unique_ptr>> writer_; std::unique_ptr external_storage_writer_; std::set inline_descriptor_fields_; + std::set inline_view_fields_; }; } // namespace paimon diff --git a/src/paimon/core/append/append_only_writer_test.cpp b/src/paimon/core/append/append_only_writer_test.cpp index 5d63f98f4..32992e12b 100644 --- a/src/paimon/core/append/append_only_writer_test.cpp +++ b/src/paimon/core/append/append_only_writer_test.cpp @@ -33,7 +33,9 @@ #include "arrow/c/helpers.h" #include "arrow/type.h" #include "gtest/gtest.h" +#include "paimon/common/data/blob_descriptor.h" #include "paimon/common/data/blob_utils.h" +#include "paimon/common/data/blob_view_struct.h" #include "paimon/common/fs/external_path_provider.h" #include "paimon/core/compact/compact_deletion_file.h" #include "paimon/core/compact/compact_result.h" @@ -705,4 +707,70 @@ TEST_F(AppendOnlyWriterTest, TestMultiplePrepareCommitSequenceContinuity) { ASSERT_OK(writer.Close()); } +TEST_F(AppendOnlyWriterTest, TestWriteValidBlobViewField) { + auto options = CreateOptions({{Options::FILE_FORMAT, "orc"}, + {Options::MANIFEST_FORMAT, "orc"}, + {Options::BLOB_VIEW_FIELD, "view"}}); + auto dir = UniqueTestDirectory::Create(); + ASSERT_TRUE(dir); + auto path_factory = CreatePathFactory(dir->Str(), "orc", options); + + auto schema = + arrow::schema({arrow::field("f0", arrow::int32()), BlobUtils::ToArrowField("view", true)}); + AppendOnlyWriter writer(options, /*schema_id=*/0, schema, /*write_cols=*/std::nullopt, + /*max_sequence_number=*/-1, path_factory, compact_manager_, + memory_pool_); + + // Build f0 column + arrow::Int32Builder int_builder; + ASSERT_TRUE(int_builder.AppendValues({1, 2}).ok()); + auto int_array = int_builder.Finish().ValueOrDie(); + + // Build view column with valid BlobViewStruct values + arrow::LargeBinaryBuilder view_builder; + BlobViewStruct view_struct_0(Identifier("db", "tbl"), /*field_id=*/1, /*row_id=*/0); + auto view_bytes_0 = view_struct_0.Serialize(memory_pool_); + ASSERT_TRUE(view_builder.Append(view_bytes_0->data(), view_bytes_0->size()).ok()); + + BlobViewStruct view_struct_1(Identifier("db", "tbl"), /*field_id=*/1, /*row_id=*/1); + auto view_bytes_1 = view_struct_1.Serialize(memory_pool_); + ASSERT_TRUE(view_builder.Append(view_bytes_1->data(), view_bytes_1->size()).ok()); + + auto view_array = view_builder.Finish().ValueOrDie(); + ASSERT_OK(writer.Write(CreateStructBatch(schema, {int_array, view_array}))); + ASSERT_OK_AND_ASSIGN(auto inc, writer.PrepareCommit(/*wait_compaction=*/true)); + ASSERT_FALSE(inc.GetNewFilesIncrement().NewFiles().empty()); + ASSERT_OK(writer.Close()); +} + +TEST_F(AppendOnlyWriterTest, TestWriteInvalidBlobViewFieldRejected) { + auto options = CreateOptions({{Options::FILE_FORMAT, "orc"}, + {Options::MANIFEST_FORMAT, "orc"}, + {Options::BLOB_VIEW_FIELD, "view"}}); + auto dir = UniqueTestDirectory::Create(); + ASSERT_TRUE(dir); + auto path_factory = CreatePathFactory(dir->Str(), "orc", options); + + auto schema = + arrow::schema({arrow::field("f0", arrow::int32()), BlobUtils::ToArrowField("view", true)}); + AppendOnlyWriter writer(options, /*schema_id=*/0, schema, /*write_cols=*/std::nullopt, + /*max_sequence_number=*/-1, path_factory, compact_manager_, + memory_pool_); + + // Build f0 column + arrow::Int32Builder int_builder; + ASSERT_TRUE(int_builder.Append(1).ok()); + auto int_array = int_builder.Finish().ValueOrDie(); + + // Build view column with raw bytes + arrow::LargeBinaryBuilder view_builder; + ASSERT_TRUE(view_builder.Append("not_a_valid_blob_view_or_descriptor").ok()); + auto view_array = view_builder.Finish().ValueOrDie(); + + ASSERT_NOK_WITH_MSG(writer.Write(CreateStructBatch(schema, {int_array, view_array})), + "BLOB inline field view configured by blob-view-field require values to be " + "a BlobViewStruct."); + ASSERT_OK(writer.Close()); +} + } // namespace paimon::test diff --git a/src/paimon/core/catalog/file_system_catalog.cpp b/src/paimon/core/catalog/file_system_catalog.cpp index 515990daf..db9c2494e 100644 --- a/src/paimon/core/catalog/file_system_catalog.cpp +++ b/src/paimon/core/catalog/file_system_catalog.cpp @@ -117,7 +117,7 @@ std::string FileSystemCatalog::GetDatabaseLocation(const std::string& db_name) c return NewDatabasePath(warehouse_, db_name); } -std::string FileSystemCatalog::GetTableLocation(const Identifier& identifier) const { +Result FileSystemCatalog::GetTableLocation(const Identifier& identifier) const { return NewDataTablePath(warehouse_, identifier); } @@ -156,7 +156,8 @@ Status FileSystemCatalog::CreateTable(const Identifier& identifier, ArrowSchema* return Status::NotImplemented( "create table operation does not support object store file system for now"); } - SchemaManager schema_manager(fs_, NewDataTablePath(warehouse_, identifier)); + PAIMON_ASSIGN_OR_RAISE(std::string table_path, NewDataTablePath(warehouse_, identifier)); + SchemaManager schema_manager(fs_, table_path); PAIMON_ASSIGN_OR_RAISE( std::unique_ptr table_schema, schema_manager.CreateTable(schema, partition_keys, primary_keys, options)); @@ -170,7 +171,8 @@ Result>> FileSystemCatalog::TableSche return Status::NotImplemented( "do not support checking TableSchemaExists for system table."); } - SchemaManager schema_manager(fs_, NewDataTablePath(warehouse_, identifier)); + PAIMON_ASSIGN_OR_RAISE(std::string table_path, NewDataTablePath(warehouse_, identifier)); + SchemaManager schema_manager(fs_, table_path); return schema_manager.Latest(); } @@ -202,10 +204,11 @@ std::string FileSystemCatalog::NewDatabasePath(const std::string& warehouse, return PathUtil::JoinPath(warehouse, db_name + DB_SUFFIX); } -std::string FileSystemCatalog::NewDataTablePath(const std::string& warehouse, - const Identifier& identifier) { +Result FileSystemCatalog::NewDataTablePath(const std::string& warehouse, + const Identifier& identifier) { + PAIMON_ASSIGN_OR_RAISE(std::string data_table_name, identifier.GetDataTableName()); return PathUtil::JoinPath(NewDatabasePath(warehouse, identifier.GetDatabaseName()), - identifier.GetTableName()); + data_table_name); } Result> FileSystemCatalog::ListDatabases() const { @@ -277,9 +280,9 @@ Result> FileSystemCatalog::LoadTableSchema( if (branch) { dynamic_options[Options::BRANCH] = branch.value(); } + PAIMON_ASSIGN_OR_RAISE(std::string table_path, GetTableLocation(data_identifier)); PAIMON_ASSIGN_OR_RAISE(std::shared_ptr system_table, - SystemTableLoader::Load(system_table_name.value(), fs_, - GetTableLocation(data_identifier), + SystemTableLoader::Load(system_table_name.value(), fs_, table_path, latest_schema.value(), dynamic_options)); PAIMON_ASSIGN_OR_RAISE(std::shared_ptr arrow_schema, system_table->ArrowSchema()); @@ -300,7 +303,8 @@ Result> FileSystemCatalog::GetTable(const Identifier& ide return std::make_shared(schema, identifier.GetDatabaseName(), identifier.GetTableName()); } - return Table::Create(fs_, GetTableLocation(identifier), identifier); + PAIMON_ASSIGN_OR_RAISE(std::string table_path, GetTableLocation(identifier)); + return Table::Create(fs_, table_path, identifier); } Status FileSystemCatalog::DropDatabase(const std::string& name, bool ignore_if_not_exists, @@ -375,7 +379,7 @@ Result> FileSystemCatalog::GetTableBranches( Status FileSystemCatalog::DropTableImpl(const Identifier& identifier, const std::vector& external_paths) { - std::string table_path = GetTableLocation(identifier); + PAIMON_ASSIGN_OR_RAISE(std::string table_path, GetTableLocation(identifier)); // Delete external paths first for (const auto& external_path : external_paths) { @@ -395,8 +399,7 @@ Status FileSystemCatalog::DropTable(const Identifier& identifier, bool ignore_if if (is_system_table) { return Status::Invalid(fmt::format("Cannot drop system table {}.", identifier.ToString())); } - - std::string table_path = GetTableLocation(identifier); + PAIMON_ASSIGN_OR_RAISE(std::string table_path, GetTableLocation(identifier)); PAIMON_ASSIGN_OR_RAISE(bool exist, fs_->Exists(table_path)); if (!exist) { if (ignore_if_not_exists) { @@ -482,8 +485,8 @@ Status FileSystemCatalog::RenameTable(const Identifier& from_table, const Identi return Status::Invalid(fmt::format("target table {} already exists", to_table.ToString())); } - std::string from_path = GetTableLocation(from_table); - std::string to_path = GetTableLocation(to_table); + PAIMON_ASSIGN_OR_RAISE(std::string from_path, GetTableLocation(from_table)); + PAIMON_ASSIGN_OR_RAISE(std::string to_path, GetTableLocation(to_table)); PAIMON_RETURN_NOT_OK(fs_->Rename(from_path, to_path)); return Status::OK(); } @@ -512,8 +515,7 @@ Result> FileSystemCatalog::ListSnapshots( if (!exists) { return Status::NotExist(fmt::format("table {} does not exist", identifier.ToString())); } - - auto table_path = GetTableLocation(identifier); + PAIMON_ASSIGN_OR_RAISE(std::string table_path, GetTableLocation(identifier)); SnapshotManager mgr(fs_, table_path, branch); PAIMON_ASSIGN_OR_RAISE(std::vector snapshots, mgr.GetAllSnapshots()); std::sort(snapshots.begin(), snapshots.end(), diff --git a/src/paimon/core/catalog/file_system_catalog.h b/src/paimon/core/catalog/file_system_catalog.h index 0563a8973..b9fbc2b47 100644 --- a/src/paimon/core/catalog/file_system_catalog.h +++ b/src/paimon/core/catalog/file_system_catalog.h @@ -57,7 +57,7 @@ class FileSystemCatalog : public Catalog { Result DatabaseExists(const std::string& db_name) const override; Result TableExists(const Identifier& identifier) const override; std::string GetDatabaseLocation(const std::string& db_name) const override; - std::string GetTableLocation(const Identifier& identifier) const override; + Result GetTableLocation(const Identifier& identifier) const override; Result> LoadTableSchema(const Identifier& identifier) const override; std::string GetRootPath() const override; std::shared_ptr GetFileSystem() const override; @@ -67,7 +67,8 @@ class FileSystemCatalog : public Catalog { private: static std::string NewDatabasePath(const std::string& warehouse, const std::string& db_name); - static std::string NewDataTablePath(const std::string& warehouse, const Identifier& identifier); + static Result NewDataTablePath(const std::string& warehouse, + const Identifier& identifier); static bool IsSystemDatabase(const std::string& db_name); static Result IsSpecifiedSystemTable(const Identifier& identifier); static Result IsSystemTable(const Identifier& identifier); diff --git a/src/paimon/core/catalog/file_system_catalog_test.cpp b/src/paimon/core/catalog/file_system_catalog_test.cpp index fce73d531..79dad5e5c 100644 --- a/src/paimon/core/catalog/file_system_catalog_test.cpp +++ b/src/paimon/core/catalog/file_system_catalog_test.cpp @@ -177,8 +177,8 @@ TEST(FileSystemCatalogTest, TestOptionsSystemTableCatalog) { ASSERT_FALSE(exists); ASSERT_OK_AND_ASSIGN(exists, catalog.TableExists(Identifier("db1", "missing$options"))); ASSERT_FALSE(exists); - ASSERT_EQ(catalog.GetTableLocation(options_identifier), - PathUtil::JoinPath(PathUtil::JoinPath(dir->Str(), "db1.db"), "tbl1$options")); + std::string table_path = + PathUtil::JoinPath(PathUtil::JoinPath(dir->Str(), "db1.db"), "tbl1$options"); ASSERT_OK_AND_ASSIGN(std::shared_ptr system_schema, catalog.LoadTableSchema(options_identifier)); @@ -542,8 +542,8 @@ TEST(FileSystemCatalogTest, TestCreateTableWhileTableExist) { ASSERT_OK(catalog.CreateTable(identifier, &schema, {"f1"}, {}, options, /*ignore_if_exists=*/true)); ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFactory::Get("local", dir->Str(), {})); - ASSERT_OK(fs->Delete( - PathUtil::JoinPath(catalog.GetTableLocation(identifier), "schema/schema-0"))); + ASSERT_OK_AND_ASSIGN(std::string table_path, catalog.GetTableLocation(identifier)); + ASSERT_OK(fs->Delete(PathUtil::JoinPath(table_path, "schema/schema-0"))); ASSERT_OK(catalog.CreateTable(identifier, &schema, {"f1"}, {}, options, /*ignore_if_exists=*/false)); } @@ -610,8 +610,8 @@ TEST(FileSystemCatalogTest, TestValidateTableSchema) { ASSERT_NOK(table_schema->GetFieldType("f4")); ASSERT_OK_AND_ASSIGN(auto fs, FileSystemFactory::Get("local", dir->Str(), {})); - std::string schema_path = - PathUtil::JoinPath(catalog.GetTableLocation(identifier), "schema/schema-0"); + ASSERT_OK_AND_ASSIGN(std::string table_path, catalog.GetTableLocation(identifier)); + std::string schema_path = PathUtil::JoinPath(table_path, "schema/schema-0"); std::string expected_json_schema; ASSERT_OK(fs->ReadFile(schema_path, &expected_json_schema)); diff --git a/src/paimon/core/core_options.cpp b/src/paimon/core/core_options.cpp index 818995706..b941a1a80 100644 --- a/src/paimon/core/core_options.cpp +++ b/src/paimon/core/core_options.cpp @@ -394,6 +394,7 @@ struct CoreOptions::Impl { std::optional scan_fallback_branch; std::optional data_file_external_paths; std::optional blob_external_storage_path; + std::optional blob_view_upstream_warehouse; std::map raw_options; @@ -558,6 +559,9 @@ struct CoreOptions::Impl { PAIMON_RETURN_NOT_OK(parser.ParseList(Options::BLOB_VIEW_FIELD, Options::FIELDS_SEPARATOR, &blob_view_fields, /*need_trim=*/true)); + // Parse blob-view-upstream-warehouse - warehouse path for configured blob view fields + PAIMON_RETURN_NOT_OK( + parser.Parse(Options::BLOB_VIEW_UPSTREAM_WAREHOUSE, &blob_view_upstream_warehouse)); // Parse blob-external-storage-field - descriptor BLOB fields written to external storage PAIMON_RETURN_NOT_OK(parser.ParseList( Options::BLOB_EXTERNAL_STORAGE_FIELD, Options::FIELDS_SEPARATOR, @@ -1423,6 +1427,10 @@ const std::vector& CoreOptions::GetBlobViewFields() const { return impl_->blob_view_fields; } +std::optional CoreOptions::GetBlobViewUpstreamWarehouse() const { + return impl_->blob_view_upstream_warehouse; +} + std::vector CoreOptions::GetBlobInlineFields() const { std::vector blob_inline_fields = impl_->blob_descriptor_fields; blob_inline_fields.insert(blob_inline_fields.end(), impl_->blob_view_fields.begin(), diff --git a/src/paimon/core/core_options.h b/src/paimon/core/core_options.h index 699506413..60f3534a4 100644 --- a/src/paimon/core/core_options.h +++ b/src/paimon/core/core_options.h @@ -183,6 +183,7 @@ class PAIMON_EXPORT CoreOptions { const std::vector& GetBlobFields() const; const std::vector& GetBlobDescriptorFields() const; const std::vector& GetBlobViewFields() const; + std::optional GetBlobViewUpstreamWarehouse() const; std::vector GetBlobInlineFields() const; const std::vector& GetBlobExternalStorageFields() const; std::optional GetBlobExternalStoragePath() const; diff --git a/src/paimon/core/core_options_test.cpp b/src/paimon/core/core_options_test.cpp index 4d8a26884..9f8742b46 100644 --- a/src/paimon/core/core_options_test.cpp +++ b/src/paimon/core/core_options_test.cpp @@ -118,6 +118,7 @@ TEST(CoreOptionsTest, TestDefaultValue) { ASSERT_TRUE(core_options.GetBlobViewFields().empty()); ASSERT_TRUE(core_options.GetBlobInlineFields().empty()); ASSERT_TRUE(core_options.GetBlobExternalStorageFields().empty()); + ASSERT_EQ(std::nullopt, core_options.GetBlobViewUpstreamWarehouse()); ASSERT_EQ(std::nullopt, core_options.GetBlobExternalStoragePath()); ASSERT_TRUE(core_options.LegacyPartitionNameEnabled()); ASSERT_TRUE(core_options.GlobalIndexEnabled()); @@ -223,6 +224,7 @@ TEST(CoreOptionsTest, TestFromMap) { {Options::BLOB_VIEW_FIELD, "blob5"}, {Options::BLOB_EXTERNAL_STORAGE_FIELD, "blob3,blob4"}, {Options::BLOB_EXTERNAL_STORAGE_PATH, "FILE:///tmp/blob_external_storage/"}, + {Options::BLOB_VIEW_UPSTREAM_WAREHOUSE, "FILE:///tmp/blob_view_upstream_warehouse/"}, {Options::PARTITION_GENERATE_LEGACY_NAME, "false"}, {Options::GLOBAL_INDEX_ENABLED, "false"}, {Options::GLOBAL_INDEX_THREAD_NUM, "4"}, @@ -360,6 +362,8 @@ TEST(CoreOptionsTest, TestFromMap) { std::vector({"blob3", "blob4"})); ASSERT_EQ(core_options.GetBlobExternalStoragePath(), std::optional("FILE:///tmp/blob_external_storage/")); + ASSERT_EQ(core_options.GetBlobViewUpstreamWarehouse(), + std::optional("FILE:///tmp/blob_view_upstream_warehouse/")); ASSERT_FALSE(core_options.LegacyPartitionNameEnabled()); ASSERT_FALSE(core_options.GlobalIndexEnabled()); ASSERT_EQ(core_options.GetGlobalIndexThreadNum(), 4); diff --git a/src/paimon/core/global_index/global_index_scan_impl.cpp b/src/paimon/core/global_index/global_index_scan_impl.cpp index de7ce1b83..77cbafb97 100644 --- a/src/paimon/core/global_index/global_index_scan_impl.cpp +++ b/src/paimon/core/global_index/global_index_scan_impl.cpp @@ -103,11 +103,11 @@ Result> GlobalIndexScanImpl::Create( auto final_executor = executor; if (!final_executor) { std::optional thread_num = options.GetGlobalIndexThreadNum(); - if (!thread_num) { - uint32_t cpu_count = std::thread::hardware_concurrency(); - thread_num = cpu_count > 0 ? static_cast(cpu_count) : 1; + if (thread_num) { + final_executor = CreateDefaultExecutor(static_cast(thread_num.value())); + } else { + final_executor = GetGlobalDefaultExecutor(); } - final_executor = CreateDefaultExecutor(static_cast(thread_num.value())); } return std::unique_ptr(new GlobalIndexScanImpl( table_schema, options, path_factory, std::move(index_metas), final_executor, pool)); diff --git a/src/paimon/core/mergetree/compact/merge_tree_compact_manager_test.cpp b/src/paimon/core/mergetree/compact/merge_tree_compact_manager_test.cpp index ef784c177..a824eae57 100644 --- a/src/paimon/core/mergetree/compact/merge_tree_compact_manager_test.cpp +++ b/src/paimon/core/mergetree/compact/merge_tree_compact_manager_test.cpp @@ -48,6 +48,10 @@ class InlineExecutor final : public Executor { } void ShutdownNow() override {} + + uint32_t GetThreadNum() const override { + return 0; + } }; class QueuedExecutor final : public Executor { @@ -58,6 +62,10 @@ class QueuedExecutor final : public Executor { void ShutdownNow() override {} + uint32_t GetThreadNum() const override { + return 0; + } + void RunAll() { for (auto& task : tasks_) { task(); diff --git a/src/paimon/core/operation/data_evolution_split_read.cpp b/src/paimon/core/operation/data_evolution_split_read.cpp index fef2d94b6..4072e80ab 100644 --- a/src/paimon/core/operation/data_evolution_split_read.cpp +++ b/src/paimon/core/operation/data_evolution_split_read.cpp @@ -174,13 +174,17 @@ Result> DataEvolutionSplitRead::WrapWithBlobViewRes CreateBlobViewReader(data_split, read_blob_view_fields)); PAIMON_ASSIGN_OR_RAISE(std::unordered_set blob_view_structs, ExtractBlobViewStructs(pre_reader.get())); - std::string warehouse_path = - PathUtil::GetParentDirPath(PathUtil::GetParentDirPath(context_->GetPath())); - auto catalog_context = std::make_shared(warehouse_path, options_.ToMap(), - options_.GetFileSystem()); - PAIMON_ASSIGN_OR_RAISE( - BlobViewResolver resolver, - BlobViewLookup::CreateResolver(blob_view_structs, catalog_context, pool_)); + std::optional warehouse_path = options_.GetBlobViewUpstreamWarehouse(); + if (!warehouse_path) { + return Status::Invalid( + "invalid config for blob view, supposed to set BLOB_VIEW_UPSTREAM_WAREHOUSE"); + } + auto catalog_context = std::make_shared( + warehouse_path.value(), options_.ToMap(), options_.GetFileSystem()); + // use global thread number + PAIMON_ASSIGN_OR_RAISE(BlobViewResolver resolver, + BlobViewLookup::CreateResolver(blob_view_structs, catalog_context, pool_, + GetGlobalDefaultExecutor())); return std::make_unique( std::move(inner_reader), std::move(read_blob_view_fields), std::move(resolver), pool_); } diff --git a/src/paimon/core/utils/blob_view_lookup.cpp b/src/paimon/core/utils/blob_view_lookup.cpp index dff77a637..b666c78a5 100644 --- a/src/paimon/core/utils/blob_view_lookup.cpp +++ b/src/paimon/core/utils/blob_view_lookup.cpp @@ -17,16 +17,21 @@ #include "paimon/core/utils/blob_view_lookup.h" #include +#include #include +#include #include "arrow/array.h" #include "arrow/c/bridge.h" #include "fmt/format.h" #include "paimon/catalog/catalog.h" #include "paimon/common/data/blob_descriptor.h" +#include "paimon/common/executor/future.h" #include "paimon/common/table/special_fields.h" #include "paimon/common/utils/arrow/status_utils.h" +#include "paimon/common/utils/path_util.h" #include "paimon/defs.h" +#include "paimon/executor.h" #include "paimon/global_index/bitmap_global_index_result.h" #include "paimon/memory/bytes.h" #include "paimon/read_context.h" @@ -47,6 +52,10 @@ void BlobViewLookup::TableReadPlan::Add(const BlobViewStruct& view_struct) { row_ranges_.push_back(view_struct.RowId()); } +const Identifier& BlobViewLookup::TableReadPlan::GetIdentifier() const { + return identifier_; +} + std::vector BlobViewLookup::TableReadPlan::GetFieldIds() const { return std::vector(references_by_field_id_.begin(), references_by_field_id_.end()); } @@ -77,10 +86,10 @@ std::vector BlobViewLookup::TableReadPlan::GetSortedDistinctRanges() cons Result BlobViewLookup::CreateResolver( const std::unordered_set& view_structs, - const std::shared_ptr& catalog_context, - const std::shared_ptr& pool) { + const std::shared_ptr& catalog_context, const std::shared_ptr& pool, + const std::shared_ptr& executor) { PAIMON_ASSIGN_OR_RAISE(DescriptorMapping mapping, - PreloadDescriptors(view_structs, catalog_context, pool)); + PreloadDescriptors(view_structs, catalog_context, pool, executor)); return BlobViewResolver([cached = std::move(mapping)](const BlobViewStruct& view_struct) -> Result> { auto iter = cached.find(view_struct); @@ -94,50 +103,100 @@ Result BlobViewLookup::CreateResolver( Result BlobViewLookup::PreloadDescriptors( const std::unordered_set& view_structs, - const std::shared_ptr& catalog_context, - const std::shared_ptr& pool) { + const std::shared_ptr& catalog_context, const std::shared_ptr& pool, + const std::shared_ptr& executor) { std::unordered_map plan_by_identifier = GroupByIdentifier(view_structs); - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr catalog, - Catalog::Create(catalog_context->root_path, catalog_context->options, - catalog_context->file_system)); - DescriptorMapping mapping; + int64_t target_rows_per_task = TargetRowsPerTask(plan_by_identifier, executor->GetThreadNum()); + + std::vector>> futures; for (const auto& [identifier, table_read_plan] : plan_by_identifier) { - std::string source_table_path = catalog->GetTableLocation(identifier); - PAIMON_ASSIGN_OR_RAISE(std::optional branch, identifier.GetBranchName()); - ScanContextBuilder scan_builder(source_table_path); - auto global_index_result = - BitmapGlobalIndexResult::FromRanges(table_read_plan.GetSortedDistinctRanges()); - scan_builder.SetGlobalIndexResult(global_index_result) - .WithMemoryPool(pool) - .WithFileSystem(catalog_context->file_system); - if (branch) { - scan_builder.AddOption(Options::BRANCH, branch.value()); - } - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr scan_context, scan_builder.Finish()); - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr table_scan, - TableScan::Create(std::move(scan_context))); - PAIMON_ASSIGN_OR_RAISE(std::shared_ptr plan, table_scan->CreatePlan()); - - ReadContextBuilder read_builder(source_table_path); std::vector field_ids = table_read_plan.GetFieldIds(); - field_ids.push_back(SpecialFieldIds::ROW_ID); - read_builder.SetReadFieldIds(field_ids) - .AddOption(Options::BLOB_AS_DESCRIPTOR, "true") - .EnablePrefetch(true) - .WithMemoryPool(pool) - .WithFileSystem(catalog_context->file_system); - if (branch) { - read_builder.WithBranch(branch.value()); - } - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr read_context, read_builder.Finish()); - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr table_read, - TableRead::Create(std::move(read_context))); - PAIMON_ASSIGN_OR_RAISE(std::unique_ptr reader, - table_read->CreateReader(plan->Splits())); - PAIMON_RETURN_NOT_OK( - ExtractBlobDescriptors(identifier, field_ids, pool, reader.get(), &mapping)); + std::vector> range_chunks = + SplitRowRanges(table_read_plan.GetSortedDistinctRanges(), target_rows_per_task); + for (const auto& range_chunk : range_chunks) { + futures.push_back(Via(executor.get(), + [catalog_context, identifier, field_ids, range_chunk, + pool]() -> Result { + return LoadTableDescriptorChunk(catalog_context, identifier, + field_ids, range_chunk, pool); + })); + } + } + + DescriptorMapping mapping; + std::vector> chunk_results = CollectAll(futures); + for (auto& chunk_result : chunk_results) { + if (!chunk_result.ok()) { + return chunk_result.status(); + } + for (const auto& [view_struct, descriptor] : chunk_result.value()) { + mapping[view_struct] = descriptor; + } + } + return mapping; +} + +Result BlobViewLookup::LoadTableDescriptorChunk( + const std::shared_ptr& catalog_context, const Identifier& identifier, + const std::vector& field_ids, const std::vector& row_ranges, + const std::shared_ptr& pool) { + auto file_system = catalog_context->file_system; + PAIMON_ASSIGN_OR_RAISE( + std::unique_ptr catalog, + Catalog::Create(catalog_context->root_path, catalog_context->options, file_system)); + + PAIMON_ASSIGN_OR_RAISE(std::string source_table_path, catalog->GetTableLocation(identifier)); + + std::string database_name = identifier.GetDatabaseName(); + PAIMON_ASSIGN_OR_RAISE(std::string data_table_name, identifier.GetDataTableName()); + std::string fallback_source_table_path = PathUtil::JoinPath( + PathUtil::JoinPath(catalog_context->root_path, database_name), data_table_name); + + PAIMON_ASSIGN_OR_RAISE(bool exist, file_system->Exists(source_table_path)); + PAIMON_ASSIGN_OR_RAISE(bool fallback_exist, file_system->Exists(fallback_source_table_path)); + + if (exist == fallback_exist) { + return Status::Invalid( + fmt::format("Ambiguous table path: both table path {} and fallback table path {} are " + "present or absent", + source_table_path, fallback_source_table_path)); + } + std::string final_table_path = exist ? source_table_path : fallback_source_table_path; + PAIMON_ASSIGN_OR_RAISE(std::optional branch, identifier.GetBranchName()); + ScanContextBuilder scan_builder(final_table_path); + auto global_index_result = BitmapGlobalIndexResult::FromRanges(row_ranges); + scan_builder.SetGlobalIndexResult(global_index_result) + .WithMemoryPool(pool) + .WithFileSystem(file_system); + if (branch) { + scan_builder.AddOption(Options::BRANCH, branch.value()); } + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr scan_context, scan_builder.Finish()); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr table_scan, + TableScan::Create(std::move(scan_context))); + PAIMON_ASSIGN_OR_RAISE(std::shared_ptr plan, table_scan->CreatePlan()); + + ReadContextBuilder read_builder(final_table_path); + std::vector read_field_ids = field_ids; + read_field_ids.push_back(SpecialFieldIds::ROW_ID); + read_builder.SetReadFieldIds(read_field_ids) + .AddOption(Options::BLOB_AS_DESCRIPTOR, "true") + .EnablePrefetch(true) + .WithMemoryPool(pool) + .WithFileSystem(file_system); + if (branch) { + read_builder.WithBranch(branch.value()); + } + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr read_context, read_builder.Finish()); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr table_read, + TableRead::Create(std::move(read_context))); + PAIMON_ASSIGN_OR_RAISE(std::unique_ptr reader, + table_read->CreateReader(plan->Splits())); + + DescriptorMapping mapping; + PAIMON_RETURN_NOT_OK( + ExtractBlobDescriptors(identifier, read_field_ids, pool, reader.get(), &mapping)); return mapping; } @@ -238,4 +297,46 @@ std::unordered_map BlobViewLookup::Gr return grouped; } +int64_t BlobViewLookup::TargetRowsPerTask( + const std::unordered_map& plan_by_identifier, uint32_t thread_num) { + int64_t total_rows = 0; + for (const auto& [identifier, table_read_plan] : plan_by_identifier) { + for (const auto& row_range : table_read_plan.GetSortedDistinctRanges()) { + total_rows += row_range.Count(); + } + } + int64_t balanced_rows = (total_rows + thread_num - 1) / thread_num; + return std::max(MIN_ROW_PER_TASK, balanced_rows); +} + +std::vector> BlobViewLookup::SplitRowRanges(const std::vector& row_ranges, + int64_t target_rows_per_task) { + if (row_ranges.empty()) { + return {}; + } + std::vector> chunks; + std::vector current_chunk; + int64_t current_chunk_rows = 0; + for (const auto& row_range : row_ranges) { + int64_t next_from = row_range.from; + while (next_from <= row_range.to) { + if (current_chunk_rows == target_rows_per_task) { + chunks.push_back(current_chunk); + current_chunk.clear(); + current_chunk_rows = 0; + } + + int64_t remaining_rows = target_rows_per_task - current_chunk_rows; + int64_t next_to = std::min(row_range.to, next_from + remaining_rows - 1); + current_chunk.emplace_back(next_from, next_to); + current_chunk_rows += next_to - next_from + 1; + next_from = next_to + 1; + } + } + if (!current_chunk.empty()) { + chunks.push_back(current_chunk); + } + return chunks; +} + } // namespace paimon diff --git a/src/paimon/core/utils/blob_view_lookup.h b/src/paimon/core/utils/blob_view_lookup.h index 3b3b3d026..22c899144 100644 --- a/src/paimon/core/utils/blob_view_lookup.h +++ b/src/paimon/core/utils/blob_view_lookup.h @@ -39,6 +39,8 @@ class BatchReader; class BlobViewLookup { public: using DescriptorMapping = std::unordered_map>; + /// The minimum number of rows handled by a single parallel task. + static constexpr int64_t MIN_ROW_PER_TASK = 100; BlobViewLookup() = delete; ~BlobViewLookup() = delete; @@ -46,7 +48,7 @@ class BlobViewLookup { static Result CreateResolver( const std::unordered_set& view_structs, const std::shared_ptr& catalog_context, - const std::shared_ptr& pool); + const std::shared_ptr& pool, const std::shared_ptr& executor); private: class TableReadPlan { @@ -54,6 +56,7 @@ class BlobViewLookup { explicit TableReadPlan(const BlobViewStruct& view_struct); void Add(const BlobViewStruct& view_struct); + const Identifier& GetIdentifier() const; std::vector GetFieldIds() const; std::vector GetSortedDistinctRanges() const; @@ -66,6 +69,11 @@ class BlobViewLookup { static Result PreloadDescriptors( const std::unordered_set& view_structs, const std::shared_ptr& catalog_context, + const std::shared_ptr& pool, const std::shared_ptr& executor); + + static Result LoadTableDescriptorChunk( + const std::shared_ptr& catalog_context, const Identifier& identifier, + const std::vector& field_ids, const std::vector& row_ranges, const std::shared_ptr& pool); static Status ExtractBlobDescriptors(const Identifier& identifier, @@ -75,6 +83,13 @@ class BlobViewLookup { static std::unordered_map GroupByIdentifier( const std::unordered_set& view_structs); + + static int64_t TargetRowsPerTask( + const std::unordered_map& plan_by_identifier, + uint32_t thread_num); + + static std::vector> SplitRowRanges(const std::vector& row_ranges, + int64_t target_rows_per_task); }; } // namespace paimon diff --git a/src/paimon/core/utils/blob_view_lookup_test.cpp b/src/paimon/core/utils/blob_view_lookup_test.cpp index d5cbf7ab3..69c369200 100644 --- a/src/paimon/core/utils/blob_view_lookup_test.cpp +++ b/src/paimon/core/utils/blob_view_lookup_test.cpp @@ -103,10 +103,8 @@ TEST_F(BlobViewLookupTest, TestGetSortedDistinctRangesMergesContiguousAndGaps) { auto ranges = plan.GetSortedDistinctRanges(); ASSERT_EQ(ranges.size(), 2U); - ASSERT_EQ(ranges[0].from, 5); - ASSERT_EQ(ranges[0].to, 7); - ASSERT_EQ(ranges[1].from, 10); - ASSERT_EQ(ranges[1].to, 11); + ASSERT_EQ(ranges[0], Range(5, 7)); + ASSERT_EQ(ranges[1], Range(10, 11)); } TEST_F(BlobViewLookupTest, TestGetSortedDistinctRangesWithNonContiguous) { @@ -116,12 +114,9 @@ TEST_F(BlobViewLookupTest, TestGetSortedDistinctRangesWithNonContiguous) { const auto ranges = plan.GetSortedDistinctRanges(); ASSERT_EQ(ranges.size(), 3U); - ASSERT_EQ(ranges[0].from, 1); - ASSERT_EQ(ranges[0].to, 1); - ASSERT_EQ(ranges[1].from, 50); - ASSERT_EQ(ranges[1].to, 50); - ASSERT_EQ(ranges[2].from, 100); - ASSERT_EQ(ranges[2].to, 100); + ASSERT_EQ(ranges[0], Range(1, 1)); + ASSERT_EQ(ranges[1], Range(50, 50)); + ASSERT_EQ(ranges[2], Range(100, 100)); } TEST_F(BlobViewLookupTest, TestEmptyInputProducesEmptyOutput) { @@ -188,4 +183,70 @@ TEST_F(BlobViewLookupTest, TestViewStructsOfDifferentTablesAreSplitIntoDistinctP ASSERT_EQ(plan_db2_t1.row_ranges_.size(), 1U); } +TEST_F(BlobViewLookupTest, TestGetIdentifier) { + BlobViewLookup::TableReadPlan plan(MakeView("db", "t", /*field_id=*/1, /*row_id=*/0)); + ASSERT_EQ(plan.GetIdentifier(), MakeIdentifier("db", "t")); +} + +TEST_F(BlobViewLookupTest, TestTargetRowsPerTaskEmptyReturnsMin) { + std::unordered_map empty; + ASSERT_EQ(BlobViewLookup::TargetRowsPerTask(empty, /*thread_num=*/100), + BlobViewLookup::MIN_ROW_PER_TASK); +} + +TEST_F(BlobViewLookupTest, TestTargetRowsPerTaskSmallTotalReturnsMin) { + std::unordered_set views; + for (int64_t row_id = 0; row_id < 10; ++row_id) { + views.emplace(MakeView("db", "t", /*field_id=*/1, row_id)); + } + auto grouped = BlobViewLookup::GroupByIdentifier(views); + // total_rows (10) is far below thread_num, so the balanced budget is clamped to + // MIN_ROW_PER_TASK. + ASSERT_EQ(BlobViewLookup::TargetRowsPerTask(grouped, /*thread_num=*/100), + BlobViewLookup::MIN_ROW_PER_TASK); +} + +TEST_F(BlobViewLookupTest, TestTargetRowsPerTaskLargeTotalBalancesAcrossThreads) { + std::unordered_set views; + const int64_t total_rows = 100001; + for (int64_t row_id = 0; row_id < total_rows; ++row_id) { + views.emplace(MakeView("db", "t", /*field_id=*/1, row_id)); + } + auto grouped = BlobViewLookup::GroupByIdentifier(views); + // ceil(100001 / 100) = 1001 + ASSERT_EQ(BlobViewLookup::TargetRowsPerTask(grouped, /*thread_num=*/100), 1001); +} + +TEST_F(BlobViewLookupTest, TestSplitRowRangesEmptyInput) { + auto chunks = BlobViewLookup::SplitRowRanges({}, /*target_rows_per_task=*/10); + ASSERT_TRUE(chunks.empty()); +} + +TEST_F(BlobViewLookupTest, TestSplitRowRangesSingleRangeFitsInOneChunk) { + auto chunks = BlobViewLookup::SplitRowRanges({Range(0, 4)}, /*target_rows_per_task=*/10); + ASSERT_EQ(chunks.size(), 1U); + ASSERT_EQ(chunks[0].size(), 1U); + ASSERT_EQ(chunks[0][0], Range(0, 4)); +} + +TEST_F(BlobViewLookupTest, TestSplitRowRangesSplitsLargeRange) { + // [0, 9] with target 4 => [0,3], [4,7], [8,9] + auto chunks = BlobViewLookup::SplitRowRanges({Range(0, 9)}, /*target_rows_per_task=*/4); + ASSERT_EQ(chunks.size(), 3U); + ASSERT_EQ(chunks[0], (std::vector{Range(0, 3)})); + ASSERT_EQ(chunks[1], (std::vector{Range(4, 7)})); + ASSERT_EQ(chunks[2], (std::vector{Range(8, 9)})); +} + +TEST_F(BlobViewLookupTest, TestSplitRowRangesPacksAcrossRanges) { + // Ranges [0,2] (3 rows) and [10,12] (3 rows) with target 4. + // chunk0 = [0,2] + part of second range [10,10] (total 4 rows) + // chunk1 = [11,12] (2 rows) + auto chunks = + BlobViewLookup::SplitRowRanges({Range(0, 2), Range(10, 12)}, /*target_rows_per_task=*/4); + ASSERT_EQ(chunks.size(), 2U); + ASSERT_EQ(chunks[0], (std::vector{Range(0, 2), Range(10, 10)})); + ASSERT_EQ(chunks[1], (std::vector{Range(11, 12)})); +} + } // namespace paimon::test diff --git a/test/inte/blob_table_inte_test.cpp b/test/inte/blob_table_inte_test.cpp index 507b31bc9..0a62111bb 100644 --- a/test/inte/blob_table_inte_test.cpp +++ b/test/inte/blob_table_inte_test.cpp @@ -16,6 +16,8 @@ #include #include +#include +#include #include #include #include @@ -1270,7 +1272,7 @@ TEST_P(BlobTableInteTest, TestDataEvolutionAndAlterTable) { DataField(3, arrow::field("f1", arrow::utf8())), DataField(4, arrow::field("f2", arrow::decimal128(6, 3))), DataField(5, arrow::field("f0", arrow::boolean())), - DataField(8, BlobUtils::ToArrowField("f5")), + DataField(8, BlobUtils::ToArrowField("blob")), DataField(9, arrow::field("f6", arrow::int32())), SpecialFields::RowId(), SpecialFields::SequenceNumber()}; @@ -1282,7 +1284,7 @@ TEST_P(BlobTableInteTest, TestDataEvolutionAndAlterTable) { // only read blob column auto expected_array = std::dynamic_pointer_cast( arrow::ipc::internal::json::ArrayFromJSON( - arrow::struct_({BlobUtils::ToArrowField("f5")}), R"([ + arrow::struct_({BlobUtils::ToArrowField("blob")}), R"([ ["Lily"], ["Alice"], ["Bob"], @@ -1295,7 +1297,7 @@ TEST_P(BlobTableInteTest, TestDataEvolutionAndAlterTable) { ["Elderberry"] ])") .ValueOrDie()); - ASSERT_OK(ScanAndRead(table_path, {"f5"}, expected_array)); + ASSERT_OK(ScanAndRead(table_path, {"blob"}, expected_array)); } { auto expected_array = std::dynamic_pointer_cast( @@ -2380,9 +2382,8 @@ TEST_P(BlobTableInteTest, TestBlobDescriptorFieldWriteRawBytesDirectly) { auto schema = arrow::schema(fields); ASSERT_NOK_WITH_MSG(WriteArray(table_path, {}, schema->field_names(), {raw_array}), - "BLOB inline field b0 configured by blob-descriptor-field or " - "blob-view-field require values " - "to be a BlobDescriptor or BlobViewStruct."); + "BLOB inline field b0 configured by blob-descriptor-field require values " + "to be a BlobDescriptor."); } TEST_P(BlobTableInteTest, TestBlobViewFieldWithUpstreamTable) { @@ -2401,13 +2402,15 @@ TEST_P(BlobTableInteTest, TestBlobViewFieldWithUpstreamTable) { arrow::FieldVector fields = {arrow::field("f0", arrow::int32()), BlobUtils::ToArrowField("view", true)}; - std::map options = {{Options::MANIFEST_FORMAT, "orc"}, - {Options::FILE_FORMAT, file_format}, - {Options::BUCKET, "-1"}, - {Options::ROW_TRACKING_ENABLED, "true"}, - {Options::DATA_EVOLUTION_ENABLED, "true"}, - {Options::BLOB_VIEW_FIELD, "view"}, - {Options::FILE_SYSTEM, "local"}}; + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, file_format}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_VIEW_FIELD, "view"}, + {Options::BLOB_VIEW_UPSTREAM_WAREHOUSE, dir_->Str()}, + {Options::FILE_SYSTEM, "local"}}; CreateTable(fields, /*partition_keys=*/{}, options); std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); @@ -2547,4 +2550,603 @@ TEST_P(BlobTableInteTest, TestBlobViewFieldWithUpstreamTable) { } } +TEST_P(BlobTableInteTest, TestBlobViewFieldWithUpstreamExternalStorageBlob) { + auto file_format = GetParam(); + + // Upstream table has two blob descriptor fields: b0 (field_id=1, inline descriptor) and + // b1 (field_id=2, descriptor + external storage). The downstream view references cells from + // both b0 and b1. + const std::string upstream_db_name = "upstream_two_blob"; + const std::string upstream_table_name = "upstream_two_blob"; + arrow::FieldVector upstream_fields = {arrow::field("f0", arrow::int32()), + BlobUtils::ToArrowField("b0", true), + BlobUtils::ToArrowField("b1", true)}; + auto upstream_schema = arrow::schema(upstream_fields); + std::map upstream_options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, file_format}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_DESCRIPTOR_FIELD, "b0,b1"}, + {Options::BLOB_EXTERNAL_STORAGE_FIELD, "b1"}, + {Options::BLOB_EXTERNAL_STORAGE_PATH, blob_dir_->Str()}, + {Options::FILE_SYSTEM, "local"}}; + + ::ArrowSchema upstream_c_schema; + ASSERT_TRUE(arrow::ExportSchema(*upstream_schema, &upstream_c_schema).ok()); + ASSERT_OK_AND_ASSIGN(auto upstream_catalog, + Catalog::Create(dir_->Str(), {{Options::FILE_SYSTEM, "local"}})); + ASSERT_OK(upstream_catalog->CreateDatabase(upstream_db_name, {}, /*ignore_if_exists=*/true)); + ASSERT_OK(upstream_catalog->CreateTable( + Identifier(upstream_db_name, upstream_table_name), &upstream_c_schema, + /*partition_keys=*/{}, /*primary_keys=*/{}, upstream_options, + /*ignore_if_exists=*/false)); + std::string upstream_table_path = + PathUtil::JoinPath(dir_->Str(), upstream_db_name + ".db/" + upstream_table_name); + + // Write 4 rows of b0/b1 data into the upstream table. + std::string upstream_raw_json = R"([ +[0, "b0_data_0", "b1_data_0"], +[1, "b0_data_1", "b1_data_1"], +[2, "b0_data_2", "b1_data_2"], +[3, "b0_data_3", "b1_data_3"] +])"; + auto upstream_raw_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(upstream_fields), + upstream_raw_json) + .ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto upstream_desc_array, + ConvertRawBlobToDescriptor(upstream_raw_array, {"b0", "b1"})); + ASSERT_OK_AND_ASSIGN( + auto upstream_commit_msgs, + WriteArray(upstream_table_path, {}, upstream_schema->field_names(), {upstream_desc_array})); + ASSERT_OK(Commit(upstream_table_path, upstream_commit_msgs)); + + // Create the downstream blob-view table that references both b0 and b1 of the upstream table. + arrow::FieldVector fields = {arrow::field("f0", arrow::int32()), + BlobUtils::ToArrowField("view", true)}; + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, file_format}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_VIEW_FIELD, "view"}, + {Options::BLOB_VIEW_UPSTREAM_WAREHOUSE, dir_->Str()}, + {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + + // Build the view column mixing references to b0 (field_id=1) and b1 (field_id=2). + // - row 0: b0 row 0 -> "b0_data_0" + // - row 1: b1 row 1 -> "b1_data_1" + // - row 2: b0 row 2 -> "b0_data_2" + // - row 3: b1 row 3 -> "b1_data_3" + Identifier upstream_identifier(upstream_db_name, upstream_table_name); + auto append_view = [&](arrow::LargeBinaryBuilder* builder, int32_t field_id, int64_t row_id) { + BlobViewStruct view_struct(upstream_identifier, field_id, row_id); + auto serialized = view_struct.Serialize(GetDefaultPool()); + ASSERT_TRUE( + builder + ->Append(reinterpret_cast(serialized->data()), serialized->size()) + .ok()); + }; + arrow::LargeBinaryBuilder view_builder; + append_view(&view_builder, /*field_id=*/1, /*row_id=*/0); + append_view(&view_builder, /*field_id=*/2, /*row_id=*/1); + append_view(&view_builder, /*field_id=*/1, /*row_id=*/2); + append_view(&view_builder, /*field_id=*/2, /*row_id=*/3); + std::shared_ptr write_view_array; + ASSERT_TRUE(view_builder.Finish(&write_view_array).ok()); + + auto write_f0_array = + arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), R"([100,101,102,103])") + .ValueOrDie(); + auto write_struct = std::dynamic_pointer_cast( + arrow::StructArray::Make(arrow::ArrayVector({write_f0_array, write_view_array}), + std::vector({"f0", "view"})) + .ValueOrDie()); + + auto schema = arrow::schema(fields); + ASSERT_OK_AND_ASSIGN(auto commit_msgs, + WriteArray(table_path, {}, schema->field_names(), {write_struct})); + ASSERT_OK(Commit(table_path, commit_msgs)); + + // Scan/read the downstream table and verify the resolved view blobs. + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + ASSERT_OK_AND_ASSIGN(auto result, + ReadTable(table_path, schema->field_names(), plan, /*predicate=*/nullptr)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + ASSERT_EQ(read_struct->length(), 4); + ASSERT_OK_AND_ASSIGN(auto result_array, ConvertDescriptorToRawBlob(read_struct, {"view"})); + + std::string expected_json = R"([ +[100, "b0_data_0"], +[101, "b1_data_1"], +[102, "b0_data_2"], +[103, "b1_data_3"] +])"; + auto expected_struct = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), expected_json) + .ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(expected_struct)); + ASSERT_TRUE(result_array->Equals(expected_with_rk)) + << "result_array:" << result_array->ToString() << std::endl + << "expected:" << expected_with_rk->ToString(); +} + +TEST_P(BlobTableInteTest, TestBlobViewFieldWithMultipleUpstreamTables) { + auto file_format = GetParam(); + if (file_format != "orc" && file_format != "parquet") { + return; + } + + // Upstream table 1: append_table_with_multi_blob, with two blob fields f5 (field_id=5) and + // f6 (field_id=6). + const std::string multi_blob_db_name = "append_table_with_multi_blob"; + const std::string multi_blob_table_name = "append_table_with_multi_blob"; + { + std::string src_db_path = paimon::test::GetDataDir() + file_format + "/" + + multi_blob_db_name + ".db/" + multi_blob_table_name; + std::string dst_db_path = + PathUtil::JoinPath(dir_->Str(), multi_blob_db_name + ".db/" + multi_blob_table_name); + ASSERT_TRUE(TestUtil::CopyDirectory(src_db_path, dst_db_path)); + } + + // Upstream table 2: blob_append_table_alter_table_with_cast_with_data_evolution, with one blob + // field blob (field_id=8). + const std::string alter_db_name = "blob_append_table_alter_table_with_cast_with_data_evolution"; + const std::string alter_table_name = + "blob_append_table_alter_table_with_cast_with_data_evolution"; + { + std::string src_db_path = paimon::test::GetDataDir() + file_format + "/" + alter_db_name + + ".db/" + alter_table_name; + std::string dst_db_path = + PathUtil::JoinPath(dir_->Str(), alter_db_name + ".db/" + alter_table_name); + ASSERT_TRUE(TestUtil::CopyDirectory(src_db_path, dst_db_path)); + } + + arrow::FieldVector fields = {arrow::field("f0", arrow::int32()), + BlobUtils::ToArrowField("view1", true), + BlobUtils::ToArrowField("view2", true)}; + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, file_format}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_VIEW_FIELD, "view1,view2"}, + {Options::BLOB_VIEW_UPSTREAM_WAREHOUSE, dir_->Str()}, + {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + + Identifier multi_blob_identifier(multi_blob_db_name, multi_blob_table_name); + Identifier alter_identifier(alter_db_name, alter_table_name); + + auto append_view = [&](arrow::LargeBinaryBuilder* builder, const Identifier& identifier, + int32_t field_id, int64_t row_id) { + BlobViewStruct view_struct(identifier, field_id, row_id); + auto serialized = view_struct.Serialize(GetDefaultPool()); + ASSERT_TRUE( + builder + ->Append(reinterpret_cast(serialized->data()), serialized->size()) + .ok()); + }; + + // Build view1 column. References multi_blob.f5 (field_id=5) and f6 (field_id=6). + // Some upstream cells are referenced more than once on purpose. + // - row 0: f5 row 3 -> 'D' * 1024 + // - row 1: f6 row 1 -> 'b' * 2048 + // - row 2: f5 row 3 -> 'D' * 1024 (repeat of row 0) + // - row 3: f6 row 4 -> 'e' * 2048 + // - row 4: f5 row 3 -> 'D' * 1024 (repeat of row 0) + // - row 5: f6 row 1 -> 'b' * 2048 (repeat of row 1) + // - row 6: f5 row 5 -> 'F' * 1024 + // - row 7: f6 row 6 -> 'g' * 2048 + arrow::LargeBinaryBuilder view1_builder; + append_view(&view1_builder, multi_blob_identifier, /*field_id=*/5, /*row_id=*/3); + append_view(&view1_builder, multi_blob_identifier, /*field_id=*/6, /*row_id=*/1); + append_view(&view1_builder, multi_blob_identifier, /*field_id=*/5, /*row_id=*/3); + append_view(&view1_builder, multi_blob_identifier, /*field_id=*/6, /*row_id=*/4); + append_view(&view1_builder, multi_blob_identifier, /*field_id=*/5, /*row_id=*/3); + append_view(&view1_builder, multi_blob_identifier, /*field_id=*/6, /*row_id=*/1); + append_view(&view1_builder, multi_blob_identifier, /*field_id=*/5, /*row_id=*/5); + append_view(&view1_builder, multi_blob_identifier, /*field_id=*/6, /*row_id=*/6); + std::shared_ptr write_view1_array; + ASSERT_TRUE(view1_builder.Finish(&write_view1_array).ok()); + + // Build view2 column. References alter.blob (field_id=8). + // Some upstream cells are referenced more than once on purpose. + // - row 0: blob row 0 -> "Lily" + // - row 1: blob row 5 -> "Apple" + // - row 2: blob row 0 -> "Lily" (repeat of row 0) + // - row 3: blob row 2 -> "Bob" + // - row 4: blob row 5 -> "Apple" (repeat of row 1) + // - row 5: blob row 9 -> "Elderberry" + // - row 6: blob row 0 -> "Lily" (repeat of row 0) + // - row 7: blob row 3 -> "Cindy" + arrow::LargeBinaryBuilder view2_builder; + append_view(&view2_builder, alter_identifier, /*field_id=*/8, /*row_id=*/0); + append_view(&view2_builder, alter_identifier, /*field_id=*/8, /*row_id=*/5); + append_view(&view2_builder, alter_identifier, /*field_id=*/8, /*row_id=*/0); + append_view(&view2_builder, alter_identifier, /*field_id=*/8, /*row_id=*/2); + append_view(&view2_builder, alter_identifier, /*field_id=*/8, /*row_id=*/5); + append_view(&view2_builder, alter_identifier, /*field_id=*/8, /*row_id=*/9); + append_view(&view2_builder, alter_identifier, /*field_id=*/8, /*row_id=*/0); + append_view(&view2_builder, alter_identifier, /*field_id=*/8, /*row_id=*/3); + std::shared_ptr write_view2_array; + ASSERT_TRUE(view2_builder.Finish(&write_view2_array).ok()); + + auto write_f0_array = arrow::ipc::internal::json::ArrayFromJSON( + arrow::int32(), R"([100,101,102,103,104,105,106,107])") + .ValueOrDie(); + auto write_struct = std::dynamic_pointer_cast( + arrow::StructArray::Make( + arrow::ArrayVector({write_f0_array, write_view1_array, write_view2_array}), + std::vector({"f0", "view1", "view2"})) + .ValueOrDie()); + + // write & commit + auto schema = arrow::schema(fields); + ASSERT_OK_AND_ASSIGN(auto commit_msgs, + WriteArray(table_path, {}, schema->field_names(), {write_struct})); + ASSERT_OK(Commit(table_path, commit_msgs)); + + // Expected blob contents per referenced upstream cell. + std::string blob_f5_row3(1024, 'D'); // multi_blob.f5 row 3 + std::string blob_f5_row5(1024, 'F'); // multi_blob.f5 row 5 + std::string blob_f6_row1(2048, 'b'); // multi_blob.f6 row 1 + std::string blob_f6_row4(2048, 'e'); // multi_blob.f6 row 4 + std::string blob_f6_row6(2048, 'g'); // multi_blob.f6 row 6 + + std::vector read_fields = {"view2", "view1", "f0"}; + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + ASSERT_OK_AND_ASSIGN(auto result, + ReadTable(table_path, read_fields, plan, /*predicate=*/nullptr)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + ASSERT_EQ(read_struct->length(), 8); + ASSERT_OK_AND_ASSIGN(auto result_array, + ConvertDescriptorToRawBlob(read_struct, {"view1", "view2"})); + + // Expected struct follows the requested (shuffled) column order: view2, view1, f0. + arrow::FieldVector expected_fields = {BlobUtils::ToArrowField("view2", true), + BlobUtils::ToArrowField("view1", true), + arrow::field("f0", arrow::int32())}; + // clang-format off + std::string expected_json = R"([ +["Lily", ")" + blob_f5_row3 + R"(", 100], +["Apple", ")" + blob_f6_row1 + R"(", 101], +["Lily", ")" + blob_f5_row3 + R"(", 102], +["Bob", ")" + blob_f6_row4 + R"(", 103], +["Apple", ")" + blob_f5_row3 + R"(", 104], +["Elderberry", ")" + blob_f6_row1 + R"(", 105], +["Lily", ")" + blob_f5_row5 + R"(", 106], +["Cindy", ")" + blob_f6_row6 + R"(", 107] +])"; + // clang-format on + auto expected_struct = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(expected_fields), expected_json) + .ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(expected_struct)); + + ASSERT_TRUE(result_array->Equals(expected_with_rk)) + << "result_array:" << result_array->ToString() << std::endl + << "expected:" << expected_with_rk->ToString(); +} + +TEST_P(BlobTableInteTest, TestBlobViewFieldWithBranchUpstreamTable) { + auto file_format = GetParam(); + const std::string upstream_db_name = "branch_upstream"; + const std::string upstream_table_name = "branch_upstream"; + arrow::FieldVector upstream_fields = {arrow::field("f0", arrow::int32()), + arrow::field("f1", arrow::int32()), + BlobUtils::ToArrowField("blob", true)}; + auto upstream_schema = arrow::schema(upstream_fields); + std::map upstream_options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, file_format}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_AS_DESCRIPTOR, "true"}, + {Options::FILE_SYSTEM, "local"}}; + + auto build_upstream_table = [&](const std::string& root_path, const std::string& db_name, + const std::string& table_name) -> std::string { + ::ArrowSchema c_schema; + EXPECT_TRUE(arrow::ExportSchema(*upstream_schema, &c_schema).ok()); + auto catalog_result = Catalog::Create(root_path, {{Options::FILE_SYSTEM, "local"}}); + EXPECT_TRUE(catalog_result.ok()); + auto catalog = std::move(catalog_result).value(); + EXPECT_OK(catalog->CreateDatabase(db_name, {}, /*ignore_if_exists=*/true)); + EXPECT_OK(catalog->CreateTable(Identifier(db_name, table_name), &c_schema, + /*partition_keys=*/{}, /*primary_keys=*/{}, upstream_options, + /*ignore_if_exists=*/false)); + std::string table_path = PathUtil::JoinPath(root_path, db_name + ".db/" + table_name); + + std::string raw_json = R"([ +[0, 0, "test_0"], +[1, 10, "test_1"], +[2, 20, "test_2"], +[3, 30, "test_3"] +])"; + auto raw_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(upstream_fields), raw_json) + .ValueOrDie()); + auto desc_array_result = ConvertRawBlobToDescriptor(raw_array, {"blob"}); + EXPECT_TRUE(desc_array_result.ok()); + auto desc_array = std::move(desc_array_result).value(); + + auto commit_msgs_result = + WriteArray(table_path, {}, upstream_schema->field_names(), {desc_array}); + EXPECT_TRUE(commit_msgs_result.ok()); + EXPECT_OK(Commit(table_path, std::move(commit_msgs_result).value())); + return table_path; + }; + + // main branch with no data + ::ArrowSchema c_schema; + ASSERT_TRUE(arrow::ExportSchema(*upstream_schema, &c_schema).ok()); + ASSERT_OK_AND_ASSIGN(auto catalog, + Catalog::Create(dir_->Str(), {{Options::FILE_SYSTEM, "local"}})); + ASSERT_OK(catalog->CreateDatabase(upstream_db_name, {}, /*ignore_if_exists=*/true)); + ASSERT_OK(catalog->CreateTable(Identifier(upstream_db_name, upstream_table_name), &c_schema, + /*partition_keys=*/{}, /*primary_keys=*/{}, upstream_options, + /*ignore_if_exists=*/false)); + std::string upstream_table_path = + PathUtil::JoinPath(dir_->Str(), upstream_db_name + ".db/" + upstream_table_name); + + // test branch with real data in a separate directory, then copy to the main table. The main + // branch stays empty, so any data read back must come from the test branch. + std::string branch_name = "test"; + auto branch_src_dir = UniqueTestDirectory::Create("local"); + std::string branch_src_table_path = + build_upstream_table(branch_src_dir->Str(), upstream_db_name, upstream_table_name); + std::string branch_dir = + PathUtil::JoinPath(upstream_table_path, "branch/branch-" + branch_name); + for (const auto& entry : std::filesystem::directory_iterator(branch_src_table_path)) { + std::string name = entry.path().filename().string(); + if (name == "snapshot" || name == "schema") { + ASSERT_TRUE(TestUtil::CopyDirectory(entry.path().string(), + PathUtil::JoinPath(branch_dir, name))); + } else if (entry.is_directory()) { + // Shared data dirs + ASSERT_TRUE(TestUtil::CopyDirectory(entry.path().string(), + PathUtil::JoinPath(upstream_table_path, name))); + } + } + // build downstream table + arrow::FieldVector fields = {arrow::field("f0", arrow::int32()), + BlobUtils::ToArrowField("view", true)}; + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, file_format}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_VIEW_FIELD, "view"}, + {Options::BLOB_VIEW_UPSTREAM_WAREHOUSE, dir_->Str()}, + {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + + Identifier branch_identifier(upstream_db_name, upstream_table_name + "$branch_" + branch_name); + arrow::LargeBinaryBuilder view_builder; + for (int64_t row = 0; row < 4; ++row) { + BlobViewStruct view_struct(branch_identifier, 2, row); + auto serialized = view_struct.Serialize(GetDefaultPool()); + ASSERT_TRUE( + view_builder + .Append(reinterpret_cast(serialized->data()), serialized->size()) + .ok()); + } + std::shared_ptr write_view_array; + ASSERT_TRUE(view_builder.Finish(&write_view_array).ok()); + auto write_f0_array = + arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), R"([100,101,102,103])") + .ValueOrDie(); + auto write_struct = std::dynamic_pointer_cast( + arrow::StructArray::Make(arrow::ArrayVector({write_f0_array, write_view_array}), + std::vector({"f0", "view"})) + .ValueOrDie()); + + auto schema = arrow::schema(fields); + ASSERT_OK_AND_ASSIGN(auto commit_msgs, + WriteArray(table_path, {}, schema->field_names(), {write_struct})); + ASSERT_OK(Commit(table_path, commit_msgs)); + + // scan/read the downstream table and verify resolved blobs come from the *test* branch. + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + ASSERT_OK_AND_ASSIGN(auto result, + ReadTable(table_path, schema->field_names(), plan, /*predicate=*/nullptr)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + ASSERT_EQ(read_struct->length(), 4); + ASSERT_OK_AND_ASSIGN(auto result_array, ConvertDescriptorToRawBlob(read_struct, {"view"})); + + std::string expected_json = R"([ +[100, "test_0"], +[101, "test_1"], +[102, "test_2"], +[103, "test_3"] +])"; + auto expected_struct = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), expected_json) + .ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(expected_struct)); + ASSERT_TRUE(result_array->Equals(expected_with_rk)) + << "result_array:" << result_array->ToString() << std::endl + << "expected:" << expected_with_rk->ToString(); +} + +TEST_P(BlobTableInteTest, TestBlobViewFailsWhenBothPathsAbsent) { + auto file_format = GetParam(); + + auto upstream_dir = UniqueTestDirectory::Create("local"); + const std::string upstream_db_name = "nonexistent_db"; + const std::string upstream_table_name = "nonexistent_table"; + + // Build downstream table that references the non-existent upstream table. + arrow::FieldVector fields = {arrow::field("f0", arrow::int32()), + BlobUtils::ToArrowField("view", true)}; + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, file_format}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_VIEW_FIELD, "view"}, + {Options::BLOB_VIEW_UPSTREAM_WAREHOUSE, upstream_dir->Str()}, + {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + + // Write a single row with a BlobViewStruct pointing to the non-existent upstream table. + Identifier upstream_identifier(upstream_db_name, upstream_table_name); + BlobViewStruct view_struct(upstream_identifier, /*field_id=*/2, /*row_id=*/0); + auto serialized = view_struct.Serialize(GetDefaultPool()); + arrow::LargeBinaryBuilder view_builder; + ASSERT_TRUE( + view_builder + .Append(reinterpret_cast(serialized->data()), serialized->size()) + .ok()); + std::shared_ptr write_view_array; + ASSERT_TRUE(view_builder.Finish(&write_view_array).ok()); + auto write_f0_array = + arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), R"([100])").ValueOrDie(); + auto write_struct = std::dynamic_pointer_cast( + arrow::StructArray::Make(arrow::ArrayVector({write_f0_array, write_view_array}), + std::vector({"f0", "view"})) + .ValueOrDie()); + + auto schema = arrow::schema(fields); + ASSERT_OK_AND_ASSIGN(auto commit_msgs, + WriteArray(table_path, {}, schema->field_names(), {write_struct})); + ASSERT_OK(Commit(table_path, commit_msgs)); + + // Reading should fail because both paths are absent. + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + ASSERT_NOK_WITH_MSG(ReadTable(table_path, schema->field_names(), plan, /*predicate=*/nullptr), + "Ambiguous table path"); +} + +TEST_P(BlobTableInteTest, TestBlobViewWithFallbackPath) { + auto file_format = GetParam(); + + const std::string upstream_db_name = "fallback_db"; + const std::string upstream_table_name = "fallback_table"; + arrow::FieldVector upstream_fields = {arrow::field("f0", arrow::int32()), + BlobUtils::ToArrowField("blob", true)}; + auto upstream_schema = arrow::schema(upstream_fields); + std::map upstream_options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, file_format}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_AS_DESCRIPTOR, "true"}, + {Options::FILE_SYSTEM, "local"}}; + + // Create the upstream table at the fallback path: /db/table (no .db). + auto upstream_dir = UniqueTestDirectory::Create("local"); + std::string fallback_table_path = + PathUtil::JoinPath(upstream_dir->Str(), upstream_db_name + "/" + upstream_table_name); + + // Manually create schema at fallback path so it can be read as a valid paimon table. + { + // Use a temporary warehouse with Catalog to build the table data, then copy to fallback. + auto temp_dir = UniqueTestDirectory::Create("local"); + ::ArrowSchema c_schema; + ASSERT_TRUE(arrow::ExportSchema(*upstream_schema, &c_schema).ok()); + ASSERT_OK_AND_ASSIGN(auto catalog, + Catalog::Create(temp_dir->Str(), {{Options::FILE_SYSTEM, "local"}})); + ASSERT_OK(catalog->CreateDatabase(upstream_db_name, {}, /*ignore_if_exists=*/true)); + ASSERT_OK(catalog->CreateTable(Identifier(upstream_db_name, upstream_table_name), &c_schema, + /*partition_keys=*/{}, /*primary_keys=*/{}, upstream_options, + /*ignore_if_exists=*/false)); + std::string temp_table_path = + PathUtil::JoinPath(temp_dir->Str(), upstream_db_name + ".db/" + upstream_table_name); + + // Write data to the temp table. + std::string raw_json = R"([[0, "hello"], [1, "world"]])"; + auto raw_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(upstream_fields), raw_json) + .ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto desc_array, ConvertRawBlobToDescriptor(raw_array, {"blob"})); + ASSERT_OK_AND_ASSIGN( + auto upstream_commit_msgs, + WriteArray(temp_table_path, {}, upstream_schema->field_names(), {desc_array})); + ASSERT_OK(Commit(temp_table_path, upstream_commit_msgs)); + + // Copy the temp table to the fallback path (without .db). + ASSERT_TRUE(TestUtil::CopyDirectory(temp_table_path, fallback_table_path)); + } + + // Build the downstream table. + arrow::FieldVector fields = {arrow::field("f0", arrow::int32()), + BlobUtils::ToArrowField("view", true)}; + std::map options = { + {Options::MANIFEST_FORMAT, "orc"}, + {Options::FILE_FORMAT, file_format}, + {Options::BUCKET, "-1"}, + {Options::ROW_TRACKING_ENABLED, "true"}, + {Options::DATA_EVOLUTION_ENABLED, "true"}, + {Options::BLOB_VIEW_FIELD, "view"}, + {Options::BLOB_VIEW_UPSTREAM_WAREHOUSE, upstream_dir->Str()}, + {Options::FILE_SYSTEM, "local"}}; + CreateTable(fields, /*partition_keys=*/{}, options); + std::string table_path = PathUtil::JoinPath(dir_->Str(), "foo.db/bar"); + + // Write downstream rows referencing the upstream fallback table. + Identifier upstream_identifier(upstream_db_name, upstream_table_name); + arrow::LargeBinaryBuilder view_builder; + for (int64_t row = 0; row < 2; ++row) { + BlobViewStruct view_struct(upstream_identifier, /*field_id=*/1, /*row_id=*/row); + auto serialized = view_struct.Serialize(GetDefaultPool()); + ASSERT_TRUE( + view_builder + .Append(reinterpret_cast(serialized->data()), serialized->size()) + .ok()); + } + std::shared_ptr write_view_array; + ASSERT_TRUE(view_builder.Finish(&write_view_array).ok()); + auto write_f0_array = + arrow::ipc::internal::json::ArrayFromJSON(arrow::int32(), R"([100, 101])").ValueOrDie(); + auto write_struct = std::dynamic_pointer_cast( + arrow::StructArray::Make(arrow::ArrayVector({write_f0_array, write_view_array}), + std::vector({"f0", "view"})) + .ValueOrDie()); + + auto schema = arrow::schema(fields); + ASSERT_OK_AND_ASSIGN(auto commit_msgs, + WriteArray(table_path, {}, schema->field_names(), {write_struct})); + ASSERT_OK(Commit(table_path, commit_msgs)); + + // Read and verify + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + ASSERT_OK_AND_ASSIGN(auto result, + ReadTable(table_path, schema->field_names(), plan, /*predicate=*/nullptr)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + ASSERT_EQ(read_struct->length(), 2); + ASSERT_OK_AND_ASSIGN(auto result_array, ConvertDescriptorToRawBlob(read_struct, {"view"})); + + std::string expected_json = R"([[100, "hello"], [101, "world"]])"; + auto expected_struct = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), expected_json) + .ValueOrDie()); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(expected_struct)); + ASSERT_TRUE(result_array->Equals(expected_with_rk)) + << "result_array:" << result_array->ToString() << std::endl + << "expected:" << expected_with_rk->ToString(); +} + } // namespace paimon::test diff --git a/test/inte/read_inte_test.cpp b/test/inte/read_inte_test.cpp index 5211fdd8c..ad4ea83ec 100644 --- a/test/inte/read_inte_test.cpp +++ b/test/inte/read_inte_test.cpp @@ -562,7 +562,7 @@ TEST(SystemTableReadInteTest, TestReadOptionsSystemTable) { /*ignore_if_exists=*/false)); ArrowSchemaRelease(&schema); - std::string system_table_path = catalog->GetTableLocation(Identifier("db1", "tbl1$options")); + std::string system_table_path = PathUtil::JoinPath(dir->Str(), "warehouse/db1.db/tbl1$options"); ScanContextBuilder scan_context_builder(system_table_path); scan_context_builder.SetOptions(options); ASSERT_OK_AND_ASSIGN(auto scan_context, scan_context_builder.Finish()); diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/README b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/README index 26a014578..ec2374225 100644 --- a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/README +++ b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/README @@ -42,8 +42,7 @@ renameColumn("f3", "f0") renameColumn("f5", "f3") updateColumnPosition(SchemaChange.Move.first("f4")) addColumn("f6", INT()) -renameColumn("blob", "f5") -updateColumnPosition(SchemaChange.Move.after("f5", "f2")) +updateColumnPosition(SchemaChange.Move.after("blob", "f2")) f4:TIMESTAMP(9):6 key0:INT:0 @@ -51,7 +50,7 @@ key1:INT:1 f3:INT:2 f1:STRING:3 f2:DECIMAL(6, 3):4 -f5:BLOB NOT NULL:8 +blob:BLOB NOT NULL:8 f0:BOOLEAN:5 f6:INT:9 @@ -66,7 +65,7 @@ set first row id to 5 commit snapshot-2 NoCompact -write "f4", "key0", "key1", "f2", "f0", "f6", "f5" +write "f4", "key0", "key1", "f2", "f0", "f6", "blob" Add(Timestamp(1732603136054l, 154), 0, 1, "55.002", true, 56, "Apple") Add(Timestamp(1732603136064l, 164), 0, 1, "666.012", false, 66, "Banana") Add(Timestamp(1732603136074l, 174), 0, 1, "-77.022", true, 76, "Cherry") @@ -78,7 +77,7 @@ commit snapshot-3 NoCompact Recall with schema-1, with _ROW_ID and _SEQUENCE_NUMBER, result: -[f4, key0, key1, f3, f1, f2, f5, f0, f6, _ROW_ID, _SEQUENCE_NUMBER] +[f4, key0, key1, f3, f1, f2, blob, f0, f6, _ROW_ID, _SEQUENCE_NUMBER] [TIMESTAMP(9), INT, INT, INT, STRING, DECIMAL(6, 3), BLOB NOT NULL, BOOLEAN, INT, BIGINT, BIGINT NOT NULL] INSERT: 1970-01-05T00:00, 0, 1, 100, 2024-11-26 06:38:56.001000001, 0.020, Lily, true, NULL, 0, 1 INSERT: 1969-11-18T00:00, 0, 1, 110, 2024-11-26 06:38:56.011000011, 11.120, Alice, true, NULL, 1, 1 diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-17cd7b66-4ca1-42b3-a569-d7b0b889d314-0.orc b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-2c772a51-21ba-4c17-b464-6f1a314d0950-0.orc similarity index 100% rename from test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-17cd7b66-4ca1-42b3-a569-d7b0b889d314-0.orc rename to test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-2c772a51-21ba-4c17-b464-6f1a314d0950-0.orc diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-330a942b-a3dd-408e-9042-293c03c6cba5-0.orc b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-a5e684ea-c245-4719-858d-eb6d7e3a8446-0.orc similarity index 100% rename from test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-330a942b-a3dd-408e-9042-293c03c6cba5-0.orc rename to test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-a5e684ea-c245-4719-858d-eb6d7e3a8446-0.orc diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-330a942b-a3dd-408e-9042-293c03c6cba5-1.blob b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-a5e684ea-c245-4719-858d-eb6d7e3a8446-1.blob similarity index 100% rename from test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-330a942b-a3dd-408e-9042-293c03c6cba5-1.blob rename to test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-a5e684ea-c245-4719-858d-eb6d7e3a8446-1.blob diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-330a942b-a3dd-408e-9042-293c03c6cba5-2.blob b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-a5e684ea-c245-4719-858d-eb6d7e3a8446-2.blob similarity index 100% rename from test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-330a942b-a3dd-408e-9042-293c03c6cba5-2.blob rename to test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-a5e684ea-c245-4719-858d-eb6d7e3a8446-2.blob diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-2b2e00bc-77fd-428d-87d1-8bea1063991c-0.orc b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-ac7f689b-b9ec-4e8a-b0ae-4844ca3aa499-0.orc similarity index 100% rename from test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-2b2e00bc-77fd-428d-87d1-8bea1063991c-0.orc rename to test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-ac7f689b-b9ec-4e8a-b0ae-4844ca3aa499-0.orc diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-2b2e00bc-77fd-428d-87d1-8bea1063991c-1.blob b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-ac7f689b-b9ec-4e8a-b0ae-4844ca3aa499-1.blob similarity index 100% rename from test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-2b2e00bc-77fd-428d-87d1-8bea1063991c-1.blob rename to test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-ac7f689b-b9ec-4e8a-b0ae-4844ca3aa499-1.blob diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-2b2e00bc-77fd-428d-87d1-8bea1063991c-2.blob b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-ac7f689b-b9ec-4e8a-b0ae-4844ca3aa499-2.blob similarity index 100% rename from test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-2b2e00bc-77fd-428d-87d1-8bea1063991c-2.blob rename to test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-ac7f689b-b9ec-4e8a-b0ae-4844ca3aa499-2.blob diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-35fe228b-1e40-44dd-9832-ff078fc60150-0.orc b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-d0ddcf17-3ea2-4c90-8995-3e0ee4d3b5a9-0.orc similarity index 100% rename from test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-35fe228b-1e40-44dd-9832-ff078fc60150-0.orc rename to test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-d0ddcf17-3ea2-4c90-8995-3e0ee4d3b5a9-0.orc diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-6ee90f75-0567-4692-b375-db3f5abd2b96-0 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-6ee90f75-0567-4692-b375-db3f5abd2b96-0 deleted file mode 100644 index ede0fd392..000000000 Binary files a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-6ee90f75-0567-4692-b375-db3f5abd2b96-0 and /dev/null differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-70d9ec54-00dc-47b6-85d8-944c25110315-0 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-70d9ec54-00dc-47b6-85d8-944c25110315-0 new file mode 100644 index 000000000..d597e2334 Binary files /dev/null and b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-70d9ec54-00dc-47b6-85d8-944c25110315-0 differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-c1e6e601-fc3b-4ab1-8b53-dd661f91a59c-0 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-c1e6e601-fc3b-4ab1-8b53-dd661f91a59c-0 new file mode 100644 index 000000000..54d935e4f Binary files /dev/null and b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-c1e6e601-fc3b-4ab1-8b53-dd661f91a59c-0 differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-fbe3fd00-750f-446d-81c3-bff220caed22-0 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-fbe3fd00-750f-446d-81c3-bff220caed22-0 new file mode 100644 index 000000000..57e6edf4b Binary files /dev/null and b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-fbe3fd00-750f-446d-81c3-bff220caed22-0 differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-3d1147dc-1860-48ab-a450-fce85ac5e9da-0 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-3d1147dc-1860-48ab-a450-fce85ac5e9da-0 new file mode 100644 index 000000000..7964d271b Binary files /dev/null and b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-3d1147dc-1860-48ab-a450-fce85ac5e9da-0 differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-3d1147dc-1860-48ab-a450-fce85ac5e9da-1 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-3d1147dc-1860-48ab-a450-fce85ac5e9da-1 new file mode 100644 index 000000000..6e191db20 Binary files /dev/null and b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-3d1147dc-1860-48ab-a450-fce85ac5e9da-1 differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-7cf0d548-c508-4619-823c-10b0a842e6c2-0 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-7cf0d548-c508-4619-823c-10b0a842e6c2-0 new file mode 100644 index 000000000..04ebc0214 Binary files /dev/null and b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-7cf0d548-c508-4619-823c-10b0a842e6c2-0 differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-7cf0d548-c508-4619-823c-10b0a842e6c2-1 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-7cf0d548-c508-4619-823c-10b0a842e6c2-1 new file mode 100644 index 000000000..7964d271b Binary files /dev/null and b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-7cf0d548-c508-4619-823c-10b0a842e6c2-1 differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-8046c4af-62fd-466e-8199-29d90455f97d-0 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-8046c4af-62fd-466e-8199-29d90455f97d-0 new file mode 100644 index 000000000..2ed0c9c32 Binary files /dev/null and b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-8046c4af-62fd-466e-8199-29d90455f97d-0 differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-8046c4af-62fd-466e-8199-29d90455f97d-1 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-8046c4af-62fd-466e-8199-29d90455f97d-1 new file mode 100644 index 000000000..0091f8e77 Binary files /dev/null and b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-8046c4af-62fd-466e-8199-29d90455f97d-1 differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-2 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-2 deleted file mode 100644 index 120279720..000000000 Binary files a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-2 and /dev/null differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-3 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-3 deleted file mode 100644 index 6e740fbd9..000000000 Binary files a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-3 and /dev/null differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-0 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-0 index 6fdd4b854..5dae45442 100644 --- a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-0 +++ b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-0 @@ -42,11 +42,12 @@ "partitionKeys" : [ "key0", "key1" ], "primaryKeys" : [ ], "options" : { - "data-evolution.enabled" : "true", - "manifest.format" : "avro", + "bucket" : "-1", "blob.target-file-size" : "50", - "file.format" : "orc", - "row-tracking.enabled" : "true" + "row-tracking.enabled" : "true", + "data-evolution.enabled" : "true", + "manifest.format" : "orc", + "file.format" : "orc" }, - "timeMillis" : 1762240062329 + "timeMillis" : 1780565151810 } \ No newline at end of file diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-1 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-1 index 9677faa59..194fb6266 100644 --- a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-1 +++ b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-1 @@ -27,7 +27,7 @@ "type" : "DECIMAL(6, 3)" }, { "id" : 8, - "name" : "f5", + "name" : "blob", "type" : "BLOB NOT NULL" }, { "id" : 5, @@ -42,11 +42,12 @@ "partitionKeys" : [ "key0", "key1" ], "primaryKeys" : [ ], "options" : { - "data-evolution.enabled" : "true", - "manifest.format" : "avro", + "bucket" : "-1", "blob.target-file-size" : "50", - "file.format" : "orc", - "row-tracking.enabled" : "true" + "row-tracking.enabled" : "true", + "data-evolution.enabled" : "true", + "manifest.format" : "orc", + "file.format" : "orc" }, - "timeMillis" : 1762240063751 + "timeMillis" : 1780565153189 } \ No newline at end of file diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-1 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-1 index dc479f2e2..ed3c4054c 100644 --- a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-1 +++ b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-1 @@ -2,18 +2,15 @@ "version" : 3, "id" : 1, "schemaId" : 0, - "baseManifestList" : "manifest-list-8c48d688-c706-4a24-b5bd-f930c1043d9e-0", - "baseManifestListSize" : 884, - "deltaManifestList" : "manifest-list-8c48d688-c706-4a24-b5bd-f930c1043d9e-1", - "deltaManifestListSize" : 995, - "changelogManifestList" : null, - "commitUser" : "1f85e3d0-2588-469c-9cfc-b15b05478937", - "commitIdentifier" : 1, + "baseManifestList" : "manifest-list-7cf0d548-c508-4619-823c-10b0a842e6c2-0", + "baseManifestListSize" : 392, + "deltaManifestList" : "manifest-list-7cf0d548-c508-4619-823c-10b0a842e6c2-1", + "deltaManifestListSize" : 1543, + "commitUser" : "7476fefe-afad-457c-b3e8-3e56cc1a4856", + "commitIdentifier" : 0, "commitKind" : "APPEND", - "timeMillis" : 1762240063499, - "logOffsets" : { }, + "timeMillis" : 1780565153104, "totalRecordCount" : 15, "deltaRecordCount" : 15, - "changelogRecordCount" : 0, "nextRowId" : 0 } \ No newline at end of file diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-2 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-2 index 62842ba1f..a34213f67 100644 --- a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-2 +++ b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-2 @@ -2,18 +2,15 @@ "version" : 3, "id" : 2, "schemaId" : 1, - "baseManifestList" : "manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-0", - "baseManifestListSize" : 995, - "deltaManifestList" : "manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-1", - "deltaManifestListSize" : 996, - "changelogManifestList" : null, - "commitUser" : "c994cddc-02cc-478b-8b1f-37fe48574b31", - "commitIdentifier" : 2, + "baseManifestList" : "manifest-list-3d1147dc-1860-48ab-a450-fce85ac5e9da-0", + "baseManifestListSize" : 1543, + "deltaManifestList" : "manifest-list-3d1147dc-1860-48ab-a450-fce85ac5e9da-1", + "deltaManifestListSize" : 1565, + "commitUser" : "be4ac30f-72d8-4764-a282-55b1d39e1027", + "commitIdentifier" : 1, "commitKind" : "APPEND", - "timeMillis" : 1762240063850, - "logOffsets" : { }, + "timeMillis" : 1780565153415, "totalRecordCount" : 20, "deltaRecordCount" : 5, - "changelogRecordCount" : 0, "nextRowId" : 0 } \ No newline at end of file diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-3 b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-3 index 54b1bdab5..0a262914f 100644 --- a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-3 +++ b/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-3 @@ -2,18 +2,15 @@ "version" : 3, "id" : 3, "schemaId" : 1, - "baseManifestList" : "manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-2", - "baseManifestListSize" : 1034, - "deltaManifestList" : "manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-3", - "deltaManifestListSize" : 996, - "changelogManifestList" : null, - "commitUser" : "c994cddc-02cc-478b-8b1f-37fe48574b31", - "commitIdentifier" : 3, + "baseManifestList" : "manifest-list-8046c4af-62fd-466e-8199-29d90455f97d-0", + "baseManifestListSize" : 1723, + "deltaManifestList" : "manifest-list-8046c4af-62fd-466e-8199-29d90455f97d-1", + "deltaManifestListSize" : 1569, + "commitUser" : "40830793-1dbe-4e84-abfc-1d2e6889880c", + "commitIdentifier" : 2, "commitKind" : "APPEND", - "timeMillis" : 1762240063892, - "logOffsets" : { }, + "timeMillis" : 1780565153586, "totalRecordCount" : 30, "deltaRecordCount" : 10, - "changelogRecordCount" : 0, "nextRowId" : 0 } \ No newline at end of file diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/README b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/README index 26a014578..ec2374225 100644 --- a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/README +++ b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/README @@ -42,8 +42,7 @@ renameColumn("f3", "f0") renameColumn("f5", "f3") updateColumnPosition(SchemaChange.Move.first("f4")) addColumn("f6", INT()) -renameColumn("blob", "f5") -updateColumnPosition(SchemaChange.Move.after("f5", "f2")) +updateColumnPosition(SchemaChange.Move.after("blob", "f2")) f4:TIMESTAMP(9):6 key0:INT:0 @@ -51,7 +50,7 @@ key1:INT:1 f3:INT:2 f1:STRING:3 f2:DECIMAL(6, 3):4 -f5:BLOB NOT NULL:8 +blob:BLOB NOT NULL:8 f0:BOOLEAN:5 f6:INT:9 @@ -66,7 +65,7 @@ set first row id to 5 commit snapshot-2 NoCompact -write "f4", "key0", "key1", "f2", "f0", "f6", "f5" +write "f4", "key0", "key1", "f2", "f0", "f6", "blob" Add(Timestamp(1732603136054l, 154), 0, 1, "55.002", true, 56, "Apple") Add(Timestamp(1732603136064l, 164), 0, 1, "666.012", false, 66, "Banana") Add(Timestamp(1732603136074l, 174), 0, 1, "-77.022", true, 76, "Cherry") @@ -78,7 +77,7 @@ commit snapshot-3 NoCompact Recall with schema-1, with _ROW_ID and _SEQUENCE_NUMBER, result: -[f4, key0, key1, f3, f1, f2, f5, f0, f6, _ROW_ID, _SEQUENCE_NUMBER] +[f4, key0, key1, f3, f1, f2, blob, f0, f6, _ROW_ID, _SEQUENCE_NUMBER] [TIMESTAMP(9), INT, INT, INT, STRING, DECIMAL(6, 3), BLOB NOT NULL, BOOLEAN, INT, BIGINT, BIGINT NOT NULL] INSERT: 1970-01-05T00:00, 0, 1, 100, 2024-11-26 06:38:56.001000001, 0.020, Lily, true, NULL, 0, 1 INSERT: 1969-11-18T00:00, 0, 1, 110, 2024-11-26 06:38:56.011000011, 11.120, Alice, true, NULL, 1, 1 diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-50989127-b7b2-4950-aaca-f77216b0a46b-0.parquet b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-50989127-b7b2-4950-aaca-f77216b0a46b-0.parquet new file mode 100644 index 000000000..5819be524 Binary files /dev/null and b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-50989127-b7b2-4950-aaca-f77216b0a46b-0.parquet differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-cea41455-da2f-4e8b-a026-f001aace3d58-1.blob b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-50989127-b7b2-4950-aaca-f77216b0a46b-1.blob similarity index 100% rename from test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-cea41455-da2f-4e8b-a026-f001aace3d58-1.blob rename to test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-50989127-b7b2-4950-aaca-f77216b0a46b-1.blob diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-cea41455-da2f-4e8b-a026-f001aace3d58-2.blob b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-50989127-b7b2-4950-aaca-f77216b0a46b-2.blob similarity index 100% rename from test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-cea41455-da2f-4e8b-a026-f001aace3d58-2.blob rename to test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-50989127-b7b2-4950-aaca-f77216b0a46b-2.blob diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-5bb7b1e9-7186-463f-82f5-8d1b004386f2-0.parquet b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-5bb7b1e9-7186-463f-82f5-8d1b004386f2-0.parquet new file mode 100644 index 000000000..307553da4 Binary files /dev/null and b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-5bb7b1e9-7186-463f-82f5-8d1b004386f2-0.parquet differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-aaf02df8-9f32-4c97-9b43-de9ef946a1ed-0.parquet b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-aaf02df8-9f32-4c97-9b43-de9ef946a1ed-0.parquet deleted file mode 100644 index 9fdb60735..000000000 Binary files a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-aaf02df8-9f32-4c97-9b43-de9ef946a1ed-0.parquet and /dev/null differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-ac3de758-1ff4-45c3-94f6-27a4e7c47172-0.parquet b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-ac3de758-1ff4-45c3-94f6-27a4e7c47172-0.parquet new file mode 100644 index 000000000..35224b8ef Binary files /dev/null and b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-ac3de758-1ff4-45c3-94f6-27a4e7c47172-0.parquet differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-c8aab979-5e51-4112-92f2-ff3036b332c6-1.blob b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-ac3de758-1ff4-45c3-94f6-27a4e7c47172-1.blob similarity index 100% rename from test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-c8aab979-5e51-4112-92f2-ff3036b332c6-1.blob rename to test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-ac3de758-1ff4-45c3-94f6-27a4e7c47172-1.blob diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-c8aab979-5e51-4112-92f2-ff3036b332c6-2.blob b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-ac3de758-1ff4-45c3-94f6-27a4e7c47172-2.blob similarity index 100% rename from test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-c8aab979-5e51-4112-92f2-ff3036b332c6-2.blob rename to test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-ac3de758-1ff4-45c3-94f6-27a4e7c47172-2.blob diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-c8aab979-5e51-4112-92f2-ff3036b332c6-0.parquet b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-c8aab979-5e51-4112-92f2-ff3036b332c6-0.parquet deleted file mode 100644 index e855b6c3f..000000000 Binary files a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-c8aab979-5e51-4112-92f2-ff3036b332c6-0.parquet and /dev/null differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-cea41455-da2f-4e8b-a026-f001aace3d58-0.parquet b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-cea41455-da2f-4e8b-a026-f001aace3d58-0.parquet deleted file mode 100644 index 1611b36b4..000000000 Binary files a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-cea41455-da2f-4e8b-a026-f001aace3d58-0.parquet and /dev/null differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-f9d44206-06f9-4411-b69f-749f7868ac2d-0.parquet b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-f9d44206-06f9-4411-b69f-749f7868ac2d-0.parquet deleted file mode 100644 index 3a4d5e51e..000000000 Binary files a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-f9d44206-06f9-4411-b69f-749f7868ac2d-0.parquet and /dev/null differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-fcdc65ab-9916-4a9e-a13a-5e5298933a18-0.parquet b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-fcdc65ab-9916-4a9e-a13a-5e5298933a18-0.parquet new file mode 100644 index 000000000..04132a79e Binary files /dev/null and b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/key0=0/key1=1/bucket-0/data-fcdc65ab-9916-4a9e-a13a-5e5298933a18-0.parquet differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-f7c12f92-163b-45c2-9160-76375875e57b-0 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-04dfe945-c569-4a24-b830-c69d4f426e64-0 similarity index 88% rename from test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-f7c12f92-163b-45c2-9160-76375875e57b-0 rename to test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-04dfe945-c569-4a24-b830-c69d4f426e64-0 index 8f8263d5f..22e9496dc 100644 Binary files a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-f7c12f92-163b-45c2-9160-76375875e57b-0 and b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-04dfe945-c569-4a24-b830-c69d4f426e64-0 differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-1cee2cdf-03d4-48ab-8683-047e8b613dd1-1 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-26f82168-b0c0-43fc-86af-1a15e18be25b-0 similarity index 84% rename from test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-1cee2cdf-03d4-48ab-8683-047e8b613dd1-1 rename to test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-26f82168-b0c0-43fc-86af-1a15e18be25b-0 index 2407d6d03..b7c6ea666 100644 Binary files a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-1cee2cdf-03d4-48ab-8683-047e8b613dd1-1 and b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-26f82168-b0c0-43fc-86af-1a15e18be25b-0 differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-3d16fcf1-d81b-4d69-b118-f581c55e2278-0 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-3d16fcf1-d81b-4d69-b118-f581c55e2278-0 deleted file mode 100644 index f65b6ddd1..000000000 Binary files a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-3d16fcf1-d81b-4d69-b118-f581c55e2278-0 and /dev/null differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-1cee2cdf-03d4-48ab-8683-047e8b613dd1-0 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-90a8ba7d-9c17-4a24-a2e6-0b67534a6961-0 similarity index 86% rename from test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-1cee2cdf-03d4-48ab-8683-047e8b613dd1-0 rename to test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-90a8ba7d-9c17-4a24-a2e6-0b67534a6961-0 index c34df04b6..7a696089d 100644 Binary files a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-1cee2cdf-03d4-48ab-8683-047e8b613dd1-0 and b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-90a8ba7d-9c17-4a24-a2e6-0b67534a6961-0 differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-f7c12f92-163b-45c2-9160-76375875e57b-1 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-f7c12f92-163b-45c2-9160-76375875e57b-1 deleted file mode 100644 index 480e49b90..000000000 Binary files a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-f7c12f92-163b-45c2-9160-76375875e57b-1 and /dev/null differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-0 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-0 deleted file mode 100644 index 1179a0013..000000000 Binary files a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-0 and /dev/null differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-1 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-1 deleted file mode 100644 index 48b19be21..000000000 Binary files a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-1 and /dev/null differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-2 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-2 deleted file mode 100644 index d26360e86..000000000 Binary files a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-2 and /dev/null differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-3 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-3 deleted file mode 100644 index 33779160d..000000000 Binary files a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-3 and /dev/null differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-1 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-6d497f15-c843-4136-b18c-25c34fa32b31-0 similarity index 68% rename from test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-1 rename to test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-6d497f15-c843-4136-b18c-25c34fa32b31-0 index 51b3950cb..1404cbe4c 100644 Binary files a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-1 and b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-6d497f15-c843-4136-b18c-25c34fa32b31-0 differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-8c48d688-c706-4a24-b5bd-f930c1043d9e-0 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-6d497f15-c843-4136-b18c-25c34fa32b31-1 similarity index 68% rename from test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-8c48d688-c706-4a24-b5bd-f930c1043d9e-0 rename to test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-6d497f15-c843-4136-b18c-25c34fa32b31-1 index cc1b1ddd8..ee129c728 100644 Binary files a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-8c48d688-c706-4a24-b5bd-f930c1043d9e-0 and b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-6d497f15-c843-4136-b18c-25c34fa32b31-1 differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-8c48d688-c706-4a24-b5bd-f930c1043d9e-1 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-ba29e330-ee07-4bd0-b5ee-02f21c647791-0 similarity index 76% rename from test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-8c48d688-c706-4a24-b5bd-f930c1043d9e-1 rename to test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-ba29e330-ee07-4bd0-b5ee-02f21c647791-0 index cacd0b15c..1b555e964 100644 Binary files a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-8c48d688-c706-4a24-b5bd-f930c1043d9e-1 and b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-ba29e330-ee07-4bd0-b5ee-02f21c647791-0 differ diff --git a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-0 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-ba29e330-ee07-4bd0-b5ee-02f21c647791-1 similarity index 68% rename from test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-0 rename to test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-ba29e330-ee07-4bd0-b5ee-02f21c647791-1 index 14070357c..41dd0a5aa 100644 Binary files a/test/test_data/orc/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-b54f1ecf-7c13-4dba-a1c5-19f988924166-0 and b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-ba29e330-ee07-4bd0-b5ee-02f21c647791-1 differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-c802eb01-61a7-4f42-98fe-4f674b3c4966-0 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-c802eb01-61a7-4f42-98fe-4f674b3c4966-0 new file mode 100644 index 000000000..e03501c36 Binary files /dev/null and b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-c802eb01-61a7-4f42-98fe-4f674b3c4966-0 differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-c802eb01-61a7-4f42-98fe-4f674b3c4966-1 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-c802eb01-61a7-4f42-98fe-4f674b3c4966-1 new file mode 100644 index 000000000..97c65ed93 Binary files /dev/null and b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-c802eb01-61a7-4f42-98fe-4f674b3c4966-1 differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-f96bc893-0979-4689-a3d0-cd9cd2f1a203-0 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-f96bc893-0979-4689-a3d0-cd9cd2f1a203-0 deleted file mode 100644 index 3102f50a1..000000000 Binary files a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-f96bc893-0979-4689-a3d0-cd9cd2f1a203-0 and /dev/null differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-f96bc893-0979-4689-a3d0-cd9cd2f1a203-1 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-f96bc893-0979-4689-a3d0-cd9cd2f1a203-1 deleted file mode 100644 index f720ef96d..000000000 Binary files a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/manifest/manifest-list-f96bc893-0979-4689-a3d0-cd9cd2f1a203-1 and /dev/null differ diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-0 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-0 index 5f36c97da..42062ec26 100644 --- a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-0 +++ b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-0 @@ -42,11 +42,12 @@ "partitionKeys" : [ "key0", "key1" ], "primaryKeys" : [ ], "options" : { + "bucket" : "-1", + "blob.target-file-size" : "50", + "row-tracking.enabled" : "true", "data-evolution.enabled" : "true", "manifest.format" : "avro", - "blob.target-file-size" : "50", - "file.format" : "parquet", - "row-tracking.enabled" : "true" + "file.format" : "parquet" }, - "timeMillis" : 1762240078799 + "timeMillis" : 1780568705752 } \ No newline at end of file diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-1 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-1 index e2f4b6555..35f77e5c8 100644 --- a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-1 +++ b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/schema/schema-1 @@ -27,7 +27,7 @@ "type" : "DECIMAL(6, 3)" }, { "id" : 8, - "name" : "f5", + "name" : "blob", "type" : "BLOB NOT NULL" }, { "id" : 5, @@ -42,11 +42,12 @@ "partitionKeys" : [ "key0", "key1" ], "primaryKeys" : [ ], "options" : { + "bucket" : "-1", + "blob.target-file-size" : "50", + "row-tracking.enabled" : "true", "data-evolution.enabled" : "true", "manifest.format" : "avro", - "blob.target-file-size" : "50", - "file.format" : "parquet", - "row-tracking.enabled" : "true" + "file.format" : "parquet" }, - "timeMillis" : 1762240081035 + "timeMillis" : 1780568707875 } \ No newline at end of file diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-1 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-1 index 02a8cd25b..4a38b0058 100644 --- a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-1 +++ b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-1 @@ -2,18 +2,15 @@ "version" : 3, "id" : 1, "schemaId" : 0, - "baseManifestList" : "manifest-list-f96bc893-0979-4689-a3d0-cd9cd2f1a203-0", - "baseManifestListSize" : 884, - "deltaManifestList" : "manifest-list-f96bc893-0979-4689-a3d0-cd9cd2f1a203-1", - "deltaManifestListSize" : 995, - "changelogManifestList" : null, - "commitUser" : "ec4202d2-8ff3-4c75-9e6a-23281dad6562", - "commitIdentifier" : 1, + "baseManifestList" : "manifest-list-ba29e330-ee07-4bd0-b5ee-02f21c647791-0", + "baseManifestListSize" : 1006, + "deltaManifestList" : "manifest-list-ba29e330-ee07-4bd0-b5ee-02f21c647791-1", + "deltaManifestListSize" : 1119, + "commitUser" : "7aec5357-eb8d-42b4-bb9e-1753ec6e10ae", + "commitIdentifier" : 0, "commitKind" : "APPEND", - "timeMillis" : 1762240080710, - "logOffsets" : { }, + "timeMillis" : 1780568707810, "totalRecordCount" : 15, "deltaRecordCount" : 15, - "changelogRecordCount" : 0, "nextRowId" : 0 } \ No newline at end of file diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-2 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-2 index 76fdce1b2..3ea1d0141 100644 --- a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-2 +++ b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-2 @@ -2,18 +2,15 @@ "version" : 3, "id" : 2, "schemaId" : 1, - "baseManifestList" : "manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-0", - "baseManifestListSize" : 995, - "deltaManifestList" : "manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-1", - "deltaManifestListSize" : 996, - "changelogManifestList" : null, - "commitUser" : "9a49e1a1-9141-41a4-bfd1-403f0679d72c", - "commitIdentifier" : 2, + "baseManifestList" : "manifest-list-6d497f15-c843-4136-b18c-25c34fa32b31-0", + "baseManifestListSize" : 1119, + "deltaManifestList" : "manifest-list-6d497f15-c843-4136-b18c-25c34fa32b31-1", + "deltaManifestListSize" : 1121, + "commitUser" : "732dc057-d23c-456e-96a7-b888244afe95", + "commitIdentifier" : 1, "commitKind" : "APPEND", - "timeMillis" : 1762240081198, - "logOffsets" : { }, + "timeMillis" : 1780568707986, "totalRecordCount" : 20, "deltaRecordCount" : 5, - "changelogRecordCount" : 0, "nextRowId" : 0 } \ No newline at end of file diff --git a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-3 b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-3 index cf9eb13bb..3c9727246 100644 --- a/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-3 +++ b/test/test_data/parquet/blob_append_table_alter_table_with_cast_with_data_evolution.db/blob_append_table_alter_table_with_cast_with_data_evolution/snapshot/snapshot-3 @@ -2,18 +2,15 @@ "version" : 3, "id" : 3, "schemaId" : 1, - "baseManifestList" : "manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-2", - "baseManifestListSize" : 1032, - "deltaManifestList" : "manifest-list-084c1aa1-da4f-4232-8e28-9d935ec4cea6-3", - "deltaManifestListSize" : 997, - "changelogManifestList" : null, - "commitUser" : "9a49e1a1-9141-41a4-bfd1-403f0679d72c", - "commitIdentifier" : 3, + "baseManifestList" : "manifest-list-c802eb01-61a7-4f42-98fe-4f674b3c4966-0", + "baseManifestListSize" : 1155, + "deltaManifestList" : "manifest-list-c802eb01-61a7-4f42-98fe-4f674b3c4966-1", + "deltaManifestListSize" : 1124, + "commitUser" : "6ae1e966-c41a-4699-b01f-0caf0b76c924", + "commitIdentifier" : 2, "commitKind" : "APPEND", - "timeMillis" : 1762240081236, - "logOffsets" : { }, + "timeMillis" : 1780568708049, "totalRecordCount" : 30, "deltaRecordCount" : 10, - "changelogRecordCount" : 0, "nextRowId" : 0 } \ No newline at end of file