From fb7e31917c2e646f8f4158e2292258363da79388 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Wed, 10 Jun 2026 19:04:32 +0800 Subject: [PATCH 1/4] test: add blob compatible test --- test/inte/blob_table_inte_test.cpp | 104 ++++++++++++++++-- .../blob_desc_field_with_external_path/README | 34 ++++++ ...aeee2df5-4d8d-46e1-823b-437e1a2bc30e-1.orc | Bin 0 -> 763 bytes ...eee2df5-4d8d-46e1-823b-437e1a2bc30e-2.blob | Bin 0 -> 77 bytes ...eee2df5-4d8d-46e1-823b-437e1a2bc30e-3.blob | Bin 0 -> 54 bytes ...eee2df5-4d8d-46e1-823b-437e1a2bc30e-0.blob | Bin 0 -> 29 bytes ...est-3978fbf9-7623-4e3e-b6ee-0d55d07377fa-0 | Bin 0 -> 2932 bytes ...ist-85cd267b-28d7-4aab-93b8-cf7e8ac57c07-0 | Bin 0 -> 392 bytes ...ist-85cd267b-28d7-4aab-93b8-cf7e8ac57c07-1 | Bin 0 -> 1510 bytes .../raw_blob/b0-row-0.bin | 1 + .../raw_blob/b0-row-1.bin | 1 + .../raw_blob/b0-row-2.bin | 1 + .../schema/schema-0 | 41 +++++++ .../snapshot/EARLIEST | 1 + .../snapshot/LATEST | 1 + .../snapshot/snapshot-1 | 16 +++ .../blob_desc_field_with_external_path/README | 34 ++++++ ...888c-c975-46af-9a7d-36ca13c32455-1.parquet | Bin 0 -> 2379 bytes ...749888c-c975-46af-9a7d-36ca13c32455-2.blob | Bin 0 -> 77 bytes ...749888c-c975-46af-9a7d-36ca13c32455-3.blob | Bin 0 -> 54 bytes ...749888c-c975-46af-9a7d-36ca13c32455-0.blob | Bin 0 -> 29 bytes ...est-de59f444-0069-4836-8dcd-8a3a81158e02-0 | Bin 0 -> 2150 bytes ...ist-7395f790-699b-4a38-8747-10f23ceba1d6-0 | Bin 0 -> 1006 bytes ...ist-7395f790-699b-4a38-8747-10f23ceba1d6-1 | Bin 0 -> 1115 bytes .../raw_blob/b0-row-0.bin | 1 + .../raw_blob/b0-row-1.bin | 1 + .../raw_blob/b0-row-2.bin | 1 + .../schema/schema-0 | 41 +++++++ .../snapshot/EARLIEST | 1 + .../snapshot/LATEST | 1 + .../snapshot/snapshot-1 | 16 +++ 31 files changed, 289 insertions(+), 7 deletions(-) create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/README create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-aeee2df5-4d8d-46e1-823b-437e1a2bc30e-1.orc create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-aeee2df5-4d8d-46e1-823b-437e1a2bc30e-2.blob create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-aeee2df5-4d8d-46e1-823b-437e1a2bc30e-3.blob create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/external_blob/data-aeee2df5-4d8d-46e1-823b-437e1a2bc30e-0.blob create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/manifest/manifest-3978fbf9-7623-4e3e-b6ee-0d55d07377fa-0 create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/manifest/manifest-list-85cd267b-28d7-4aab-93b8-cf7e8ac57c07-0 create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/manifest/manifest-list-85cd267b-28d7-4aab-93b8-cf7e8ac57c07-1 create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-0.bin create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-1.bin create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-2.bin create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/schema/schema-0 create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/EARLIEST create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/LATEST create mode 100644 test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/snapshot-1 create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/README create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-e749888c-c975-46af-9a7d-36ca13c32455-1.parquet create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-e749888c-c975-46af-9a7d-36ca13c32455-2.blob create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-e749888c-c975-46af-9a7d-36ca13c32455-3.blob create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/external_blob/data-e749888c-c975-46af-9a7d-36ca13c32455-0.blob create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/manifest/manifest-de59f444-0069-4836-8dcd-8a3a81158e02-0 create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/manifest/manifest-list-7395f790-699b-4a38-8747-10f23ceba1d6-0 create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/manifest/manifest-list-7395f790-699b-4a38-8747-10f23ceba1d6-1 create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-0.bin create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-1.bin create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-2.bin create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/schema/schema-0 create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/EARLIEST create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/LATEST create mode 100644 test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/snapshot-1 diff --git a/test/inte/blob_table_inte_test.cpp b/test/inte/blob_table_inte_test.cpp index ca78d2f91..c9a290b89 100644 --- a/test/inte/blob_table_inte_test.cpp +++ b/test/inte/blob_table_inte_test.cpp @@ -16,11 +16,11 @@ #include #include -#include -#include +#include #include #include #include +#include #include #include #include @@ -40,6 +40,7 @@ #include "paimon/common/data/binary_array_writer.h" #include "paimon/common/data/binary_row.h" #include "paimon/common/data/binary_row_writer.h" +#include "paimon/common/data/blob_descriptor.h" #include "paimon/common/data/blob_view_struct.h" #include "paimon/common/factories/io_hook.h" #include "paimon/common/table/special_fields.h" @@ -345,19 +346,68 @@ class BlobTableInteTest : public testing::Test, public ::testing::WithParamInter }); } + struct BlobDescriptorPathRewrite { + std::string table_path; + std::vector table_relative_blob_dirs; + }; + + static std::optional TryRewriteDescriptorUri( + const std::string& descriptor_uri, const BlobDescriptorPathRewrite& rewrite, + const std::shared_ptr& fs) { + if (rewrite.table_path.empty()) { + return std::nullopt; + } + + for (const auto& blob_dir : rewrite.table_relative_blob_dirs) { + const std::string marker = "/" + blob_dir + "/"; + auto marker_pos = descriptor_uri.find(marker); + if (marker_pos != std::string::npos) { + std::string relative_blob_path = descriptor_uri.substr(marker_pos + 1); + return PathUtil::JoinPath(rewrite.table_path, relative_blob_path); + } + } + + std::string file_name = PathUtil::GetName(descriptor_uri); + for (const auto& blob_dir : rewrite.table_relative_blob_dirs) { + std::string candidate = + PathUtil::JoinPath(PathUtil::JoinPath(rewrite.table_path, blob_dir), file_name); + auto exists = fs->Exists(candidate); + if (exists.ok() && exists.value()) { + return candidate; + } + } + return std::nullopt; + } + /// Convert a StructArray with serialized BlobDescriptor bytes back to a StructArray /// with raw blob bytes. Only blob fields are resolved; other columns (including /// _VALUE_KIND) are kept as-is. Result> ConvertDescriptorToRawBlob( const std::shared_ptr& desc_array, - const std::set& blob_fields) const { + const std::set& blob_fields, + const BlobDescriptorPathRewrite& rewrite = {}) const { auto fs = std::make_shared(); return TransformBlobFields( desc_array, blob_fields, [&](const std::string_view& descriptor_bytes, arrow::LargeBinaryBuilder* builder) -> Status { - PAIMON_ASSIGN_OR_RAISE(auto blob, Blob::FromDescriptor(descriptor_bytes.data(), - descriptor_bytes.size())); + PAIMON_ASSIGN_OR_RAISE( + auto descriptor, + BlobDescriptor::Deserialize(descriptor_bytes.data(), descriptor_bytes.size())); + std::string descriptor_uri = descriptor->Uri(); + auto rewritten_uri = TryRewriteDescriptorUri(descriptor_uri, rewrite, fs); + if (rewritten_uri.has_value()) { + descriptor_uri = rewritten_uri.value(); + } + + PAIMON_ASSIGN_OR_RAISE( + auto rewritten_descriptor, + BlobDescriptor::Create(descriptor->Version(), descriptor_uri, + descriptor->Offset(), descriptor->Length())); + auto rewritten_descriptor_bytes = rewritten_descriptor->Serialize(pool_); + PAIMON_ASSIGN_OR_RAISE(auto blob, + Blob::FromDescriptor(rewritten_descriptor_bytes->data(), + rewritten_descriptor_bytes->size())); PAIMON_ASSIGN_OR_RAISE(auto data, blob->ToData(fs, pool_)); PAIMON_RETURN_NOT_OK_FROM_ARROW(builder->Append(data->data(), data->size())); return Status::OK(); @@ -2081,8 +2131,12 @@ TEST_P(BlobTableInteTest, TestBlobDescriptorFieldPartialExternalStorageNoAsDescr auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); auto read_struct = std::dynamic_pointer_cast(read_concat); - // After read, b0 and b1 are both descriptor-stored; resolve all back to raw bytes - ASSERT_OK_AND_ASSIGN(auto resolved, ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"})); + // After read, b0 and b1 are both descriptor-stored; resolve all back to raw bytes. + // Java-generated descriptors may contain absolute paths from the generation machine. + // Rewrite them to the portable blob directories inside the copied table path. + BlobDescriptorPathRewrite rewrite{table_path, {"raw_blob", "external_blob"}}; + ASSERT_OK_AND_ASSIGN(auto resolved, + ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"}, rewrite)); ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(raw_array)); ASSERT_TRUE(resolved->Equals(expected_with_rk)); @@ -3008,4 +3062,40 @@ TEST_P(BlobTableInteTest, TestBlobViewWithFallbackPath) { << "expected:" << expected_with_rk->ToString(); } +TEST_P(BlobTableInteTest, TestReadBlobDescriptorFieldFromJava) { + std::string table_path = + GetDataDir() + "/" + file_format + + "/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path"; + arrow::FieldVector fields = { + arrow::field("f0", arrow::int32()), BlobUtils::ToArrowField("b0", true), + BlobUtils::ToArrowField("b1", true), BlobUtils::ToArrowField("b2", true), + BlobUtils::ToArrowField("b3", true)}; + auto schema = arrow::schema(fields); + // b0: all non-null, b1: has nulls, b2: all non-null, b3: has nulls + std::string raw_json = R"([ + [1, "img_0", null, "raw_2_0", "raw_3_0"], + [2, "img_1", "vid_1", "raw_2_1", null ], + [3, "img_2", null, "raw_2_2", "raw_3_2" ] + ])"; + auto raw_array = std::dynamic_pointer_cast( + arrow::ipc::internal::json::ArrayFromJSON(arrow::struct_(fields), raw_json).ValueOrDie()); + + ASSERT_OK_AND_ASSIGN(auto plan, ScanTable(table_path)); + std::map read_options = {{Options::BLOB_AS_DESCRIPTOR, "false"}}; + ASSERT_OK_AND_ASSIGN(auto result, ReadTable(table_path, schema->field_names(), plan, + /*predicate=*/nullptr, read_options)); + ASSERT_TRUE(result.chunked_array); + auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); + auto read_struct = std::dynamic_pointer_cast(read_concat); + + // After read, b0 and b1 are both descriptor-stored; resolve all back to raw bytes. + // Java-generated descriptors may contain absolute paths from the generation machine. + // Rewrite them to the portable blob directories inside the copied table path. + BlobDescriptorPathRewrite rewrite{table_path, {"raw_blob", "external_blob"}}; + ASSERT_OK_AND_ASSIGN(auto resolved, + ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"}, rewrite)); + ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(raw_array)); + ASSERT_TRUE(resolved->Equals(expected_with_rk)); +} + } // namespace paimon::test diff --git a/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/README b/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/README new file mode 100644 index 000000000..dd0ea5e3a --- /dev/null +++ b/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/README @@ -0,0 +1,34 @@ +f0:int b0:blob b1:blob b2:blob b3:blob (all can be null) +bucket count: -1 +target-file-size: 700 +row-tracking.enabled: true +data-evolution.enabled: true +blob-descriptor-field: b0,b1 +blob-external-storage-field: b1 +blob-external-storage-path: /external_blob (absolute path at generation time) + +b0: descriptor field, inline in main file, source .bin files in raw_blob/ +b1: descriptor field, repacked to external storage in external_blob/ +b2: regular blob, written to .blob files +b3: regular blob, written to .blob files + +Note: b0 is passed as descriptor via Blob.fromLocal(); b1/b2/b3 are raw bytes. +Paimon auto-converts b1 to descriptor internally. + +Msgs: +snapshot-1 +write field: "f0", "b0", "b1", "b2", "b3" +Add: 1, "img_0", null, "raw_2_0", "raw_3_0" +Add: 2, "img_1", "vid_1", "raw_2_1", null +Add: 3, "img_2", null, "raw_2_2", "raw_3_2" +NoCompact + +Expected data files after commit: + [0] main orc file (f0, b0, b1), 3 rows + [1] .blob file (b2), 3 rows + [2] .blob file (b3), 3 rows + +C++ read note: + Descriptor URIs contain absolute paths from the Java generation machine. + ConvertDescriptorToRawBlob uses BlobDescriptorPathRewrite{"raw_blob", "external_blob"} + to redirect them to
/raw_blob/ and
/external_blob/ at read time. diff --git a/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-aeee2df5-4d8d-46e1-823b-437e1a2bc30e-1.orc b/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-aeee2df5-4d8d-46e1-823b-437e1a2bc30e-1.orc new file mode 100644 index 0000000000000000000000000000000000000000..179e5412d33ed52918613839e35eebf297f4413b GIT binary patch literal 763 zcmeYdau#G@;9?VE;b0D6&;~MvxtJLk7=(B@n1$Flm;~4)cmfzSf#O13tUz&~3?~P( z6Envj_5cP$ps*MhCs-IN#R!yI#vH&16m(-!Nw8vM(AcW~H{l<{To#5R4>{Ehrm{ST z4S4jf`p7A(>QA&i=4O6aTay|0^Y6SniL*w@g9_WyGJ>Xd3|=rA*V z?vVI&na@X)(S*~r*g=PP3G&h^Ao1TJ3gS|Q z%c`slm%LPzCzxCQ|6lLYd^uOeGp~D|itnK{)xVQPUgTxGSM@%2ch3J-p7Qc?^#aD% z+58-rIj-DT?4ccC)P3%d!d#Cw@i}K44lUn(^@aB1Ur{N$cGML3Z^+y=Y3+hD`=182 zezmunzAAl}%K9)*qyMfO-LlVp-MV6Z&;4J)h3%JECo*|<9{vze zp|+UJ_ZpL@Fq-r~yJ%y$e)$2=J;uyX+u1vKg-mNEi)u-J~+&8Pznzooe>3?>{L%mR~G zq_}}psKj$7g7iMPvAZG~xqao_M literal 0 HcmV?d00001 diff --git a/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-aeee2df5-4d8d-46e1-823b-437e1a2bc30e-2.blob b/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-aeee2df5-4d8d-46e1-823b-437e1a2bc30e-2.blob new file mode 100644 index 0000000000000000000000000000000000000000..9cbdf5db3f6cbff7fe742fd48217db6041dc1b0b GIT binary patch literal 77 zcmX>v=oe9xSRQW_Zy?S90YPesb?4#2hA?44BdJV;un|o73g7eqJq8A5pk77*&lwOg literal 0 HcmV?d00001 diff --git a/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-aeee2df5-4d8d-46e1-823b-437e1a2bc30e-3.blob b/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-aeee2df5-4d8d-46e1-823b-437e1a2bc30e-3.blob new file mode 100644 index 0000000000000000000000000000000000000000..8e8a3cdb65e6a85b6405bce3eb21ef879fa8667d GIT binary patch literal 54 rcmX>v=oe9xSRQX2Zy?S90h4~d-+vx1Yy=Y)xhOeNPv3wUsE82&v6BsK literal 0 HcmV?d00001 diff --git a/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/external_blob/data-aeee2df5-4d8d-46e1-823b-437e1a2bc30e-0.blob b/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/external_blob/data-aeee2df5-4d8d-46e1-823b-437e1a2bc30e-0.blob new file mode 100644 index 0000000000000000000000000000000000000000..fa69a509e514bf6257b924fc9169a2b79945f90a GIT binary patch literal 29 ecmX>v=oe9znG$a($^Zd7gKy7d)X`=J@)!YgJqAkv literal 0 HcmV?d00001 diff --git a/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/manifest/manifest-3978fbf9-7623-4e3e-b6ee-0d55d07377fa-0 b/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/manifest/manifest-3978fbf9-7623-4e3e-b6ee-0d55d07377fa-0 new file mode 100644 index 0000000000000000000000000000000000000000..5a29f6c000e1382894076007c9b955193e69f4c5 GIT binary patch literal 2932 zcmb_cdpJ~iAOFoc<2c4lGlrPa#V`t^VVH3%i`+#_5nZrma*2pg=z>L-ZW2jKVJRZ@ zy6z^qy-QOoq$jJST;H}-lBi9!$BcKMo;US8`#kSI-*e9Q`uTh>$7Q{h8~_A_ibr8S z(8gZn2`~VFNIj2Fg|xE}A3)#~D_*e^(#7yDl<-@C@k7yI%9Jc^ zhBOamgo4nhUt>5w@6UM5?=Zhs4+Etr0Y#o2f=~vHM=9|rEgofJCbGnYqJRp`_Wavb zqUVK-X}?kBi@5-7>iBtj0#P6u@(=WLTQX06@%DTJk_twgBq5HVKnNE2m75o{ON8QHYQG zY%pKqlg#$-014`^rvCuLEu8dO-K6^I9ULkAzcm!TS9M0Y6|3l z;Mq#ZSHb)r4PsurIZCES0Ic^1+MIsS}CwIl=$ygpJA)YQ%QJ76KNt zxy9WR3vrhv0!vBLCOG7X3Cq{Zx68MSabV)p-)^6Luw)jv+3z`}IQiX$0Qhr+9HE|2 zU&zIb5abJq|8G7=AR8HQyq%@WNp+G&54PfIIIoMww645$htrN*+nUt`TorDQDh$!` zOG_RC`2(7iZKhNtTl3-4JomM8qAXx71y!`L!&&^3vdhxAU#5C#u8OXJm-( zyI0uu|6A_ZMB@9QU1#jS8x1jtKAkMcl4?$PQ?HHppcckFSnlyG;>w=ZjXs2DCpWC? zy%4RnW-)HbQVqnkHqxpEthhq?K=<4!1Jysjyt|`mByf5$F`#&LVN+Ia`?wnOnHv==%i|T zuOf4Ja_oZPqd&V$qcXzm5^g@S&Lob8uij}P^3hRTq_f0x?=YDO_Hki%DZ_F{B*>gX z@m5h>geUjH=3F8URqR3X9{ZM`6-C9sAl92SFj9P14s1Tv5XNs2Od-)b!+6BJKRjA@ zWwVl4PQasy66;J(+EHPG+(aZRScE)O2Fc|G)(n?2I3H!8j7E}(L9$y$vqsUy>B+b= z(zoGlnbARH=A~sJ0}iG-w*wsy-gnfU zW(`ip?4_|Xc)ly+!#Wugj3$uswzj=p-?DZx=;(@@QwRNH+BAm$klSCdY)i${?A=Zo z0li)9{RJZz!_z+gS;2Bz>v7w=xs_C`puAB^$s~W^!nT7lq&Rs}o@JBCd+9B3toNIk zsPv94Exqe6b!z0S=-t3(MjjmXX{$OtR1#h@QeWU9{~_|wsER>S`KcWyFZ0!$ z{kmsHmTT16MDRsZ^6cL9(sSNj&x$M9FHaf%B5m}(NK}!=xczF=s@+|z>3GY4|D4PC zQFHuFv(@O6E^d7Vvu?NSwV$>=N1tAX^AIj%zMxh6fHE=F>W+`0nf>=AZd8Fa!&+VX z_qNtYO@qrrlso$~hqTDCW8ofk?t8lrmsUN!e7HfaUmKHQO5H{bQvJ#k;2OKyGM@OcTi=d1HP5 zIYYlOIJoRQedS)muI=BS&v;eVb=Ui71U{*+f3kU(irWiYh2)TDuU+nI74_>EO>1@R z58fWSau?k*kI`hAWQ!{46c27J#LedU7%ZGec-Pf?$^_*j7U4XZ>@Xx z@sLXM{*)gC*w_i7!-sSgR( zEJ+bere3?V+_PA2HBQc+Y0ab-73wTLR-ngvO?@J-X9-kI)f#!KM=TxQ*&W60v^yAnL#mw7*D;9Z+s=gU$Tr))EKCO&#h zb!f-FhslIb2b{SZf3qANv8XyVXx|svUeZl5nQ>f3G> kMkS{!J?Ix$*?6%eCzWmm-3s+mN=C}AF ze4M$vx??@>6Tpf<9+hLEpe8AL$jYM$I$4tk5Dp=Rh(?TPlS}BQDs-JVvGrISKb3pO z+%{qJX+B@>9do;1Im_ATk5liu_2=&HyF1kj0vhF$W-*e{11NU6OIo0J{<_U${N=B- z{c!a(oH5DD4JakUD3|n#AR5CZ#Rw$X5Sly$*(5W;6rkt>Ia%TW1U={rNkgG8H0Z$r zNkVrGdeA8iUf6|EE}4BEn$asZ?>eDGfv0XgrR%c{h>KjE^n3tKBT3n)QY-w(hmXv0xYi?#_tZR~LoT{5-mYS+-kYZ|@ zVqk7;Zl0E?Yak>M7r?;8AQ8%_yMWaYXq_0uI)0#aoE$H71zzY%ywJts5;g%g2{tU^ z3<3-i3;_&Co=^vxDhPBT2nca-Fgh`DNCYro7e#eJ07DWdM+1jI1BXNdhceI@eu(2Z zI2eSOK(=6n57hQSBMyx(b_NDF28VbC1`=ZjsLziA=7s2@Xv- z0hT&|RS=?{ND}Bs9Y%1{N+@7RWnoyPfA|+(TjF3+a=Di%Eb!Lsf|^gKH;##>9qzAmLCz=VPNrp*RHm_b?aDF6$F?a zt!p-jv3T1VM?Nv$Y0C6!ZThxZCkUM^to~e8-9j@jOe`Z!(JAbHm7` zRkqN5&0=ZYzc*C1R35aI7~e1EublM6G0N*`=F24xyBVIE8qeKnb8M?q(;QZgvy2Dq zI!l^k0~aP8y%O$izvfWM)}x2mxEYL_o7y>K?i~28ee8A5xid`P)B9RiFP@R(a?!(; z@ewm5SZ_1g@-wu}O0W*_T*GW29DDejjuLl+VpgDJ)`pBd{he&LO5(SDK3((JS1M^^ z`N3SrZM&YU7PiKTC{ASg5N}l0e8fvKfFX#%pW$xWrZ--jx~H!H-yb5vxmZqAi6D@(m~ z&qDH?%k2$+4;*Z}JoA7D&jzX3dPeoRB|R%%xamLq#oyQWRe4=lSgPy&1=~BPIreW| za%!9R?35WPOb1-&e@)fi_I|ti9x+GAk?J zEK*HNp/external_blob (absolute path at generation time) + +b0: descriptor field, inline in main file, source .bin files in raw_blob/ +b1: descriptor field, repacked to external storage in external_blob/ +b2: regular blob, written to .blob files +b3: regular blob, written to .blob files + +Note: b0 is passed as descriptor via Blob.fromLocal(); b1/b2/b3 are raw bytes. +Paimon auto-converts b1 to descriptor internally. + +Msgs: +snapshot-1 +write field: "f0", "b0", "b1", "b2", "b3" +Add: 1, "img_0", null, "raw_2_0", "raw_3_0" +Add: 2, "img_1", "vid_1", "raw_2_1", null +Add: 3, "img_2", null, "raw_2_2", "raw_3_2" +NoCompact + +Expected data files after commit: + [0] main orc file (f0, b0, b1), 3 rows + [1] .blob file (b2), 3 rows + [2] .blob file (b3), 3 rows + +C++ read note: + Descriptor URIs contain absolute paths from the Java generation machine. + ConvertDescriptorToRawBlob uses BlobDescriptorPathRewrite{"raw_blob", "external_blob"} + to redirect them to
/raw_blob/ and
/external_blob/ at read time. diff --git a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-e749888c-c975-46af-9a7d-36ca13c32455-1.parquet b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-e749888c-c975-46af-9a7d-36ca13c32455-1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..42bd60e03504b93a325a19ee7db1800ee40f49fa GIT binary patch literal 2379 zcmWG=3^EjD5LFSi5j{8U zP}Kqs(MwFCuMf0;=f%+WhJ_)?Q&DZfa?Ah!|DO-_nEQXJ3a5t`-;sZQnZoXi#J10G zxcySiiSvJ&*`(m^O9eii^sw+Rl2fLU>e=v#uKRIC|8-@>BSQ(UhG}I?7 zfBFA*EEBpVGWE_P-=Yx5piL&arfrE*DL0p_)swWJ77%IFo=|4=Dk&qk$?4sWc}@kp zH@<#(IbiYAdv@oV?p$P!j@d3@Kke45O^?gU?!*Zg%k>87a=HZSn8fKdi+-{{eCR=f zV?cvJ!Jm8!zW@Eq3;|PBmKiNO)g=e?l%yb&qy!5nu7DIXkP>B*6oU#oFgXXix;Xj! zIQ0T0^fU5vQ}uHyDviuc49$)83lcMP^Yfrod`W6?i9VP}Ni0d!Ps+(picgW0pwx&| ztj^g+xpT-f0_+?X21yw)4$&-NFo9S_qAkpz5EJ{u4&r`e7F`A5FJlG;rjG|NB&6bt zlQU9t6Ghp8qN+?1Olbz8z|iIaF_R2JnP5!AP!i$_#MYyrEd9s|fM9++845VJ0E zh_x`Q71@|EXslwSG^$gJlZ!G7O7e^1(=tYpRfQ(|GTw;1^ zd|rNhVsU&5T)lo;e!gBxl73QRk$zEPc|0iP=qDNI7Uh@g8tElv<^e+)41nbV7)T6K z#~NUBEK+=c(g%asJRVUwv2*OA^O(@$#1I_6Zn&`i%jK_jU&GbcsC#K0)o*uu~t+1T7D#mLgkz&O>= o!qD8@FflnbDJ?ZQ+1xNGHOv=oe9xSRQW_Zy?S90YPesb?4#2hA?44BdJV;un|o73g7eqJq8A5pk77*&lwOg literal 0 HcmV?d00001 diff --git a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-e749888c-c975-46af-9a7d-36ca13c32455-3.blob b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/bucket-0/data-e749888c-c975-46af-9a7d-36ca13c32455-3.blob new file mode 100644 index 0000000000000000000000000000000000000000..8e8a3cdb65e6a85b6405bce3eb21ef879fa8667d GIT binary patch literal 54 rcmX>v=oe9xSRQX2Zy?S90h4~d-+vx1Yy=Y)xhOeNPv3wUsE82&v6BsK literal 0 HcmV?d00001 diff --git a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/external_blob/data-e749888c-c975-46af-9a7d-36ca13c32455-0.blob b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/external_blob/data-e749888c-c975-46af-9a7d-36ca13c32455-0.blob new file mode 100644 index 0000000000000000000000000000000000000000..fa69a509e514bf6257b924fc9169a2b79945f90a GIT binary patch literal 29 ecmX>v=oe9znG$a($^Zd7gKy7d)X`=J@)!YgJqAkv literal 0 HcmV?d00001 diff --git a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/manifest/manifest-de59f444-0069-4836-8dcd-8a3a81158e02-0 b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/manifest/manifest-de59f444-0069-4836-8dcd-8a3a81158e02-0 new file mode 100644 index 0000000000000000000000000000000000000000..e8c82cc3303d1d345f5a383309a7dedb2139b9ac GIT binary patch literal 2150 zcmeZI%3@>@ODrqO*DFrWNX<>$CtIylQdy9yWTjM;nw(#hqNJmgmzayeFD^(-1_|aD zrRyaE*%_&N1&Nut`FVO^!_rgpQi~ExQbF3&GE;L>ij}OQt6?U^hq(p?d;0qUC82g@ z=9MVb>L3(&d-}QHmJM(W3h@j-~-QA$ZoODxSP zQL+N*tc|TjPK;nj!2JU;5^gm1Tmnf6@ep6awGRED0L5IeYhb9WpEEEuhx$6X1|iZI zW-0_GG2&GRJA1hLI>vhnLR!|}ip(=`GfG;mWO86TfZAZ3n!Xpl3a z)PZXUg)lzNNO=R83w;p54l@BBX1IbMpXtE(bPe(Y7SjQaAs(21gBGCpOmOoI3Jw8> zGE%yNxs7lThX;8ABZ*2L1C}bWwIbm9F*!daHCd<%m|XHw5{pt8emczX;9hxw>yYT3 z#l7O5E7`s=X>8U1%iuAUl_AJeL4CneNxuKhp`uSz>+Umk?32x&xcXY}o4Na|gq4(> zyr(?o$*(xA<{9MN$fY-F>-Xxb5)(SSl)4jLH@R$TNm~2;{r7}`0tbN$`{#bWcP;tx zw~ylQ^0wbLJ(TLp(B@!n;llv_S8cFkBx@vn(**Ke3 zm|;T4)iW|%e9V-k&2G>1*>tO+sJN(M#;dez3&fW2F^DvGHyk{Wkg$P?xu}5y-8TTr Cp3DXS literal 0 HcmV?d00001 diff --git a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/manifest/manifest-list-7395f790-699b-4a38-8747-10f23ceba1d6-0 b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/manifest/manifest-list-7395f790-699b-4a38-8747-10f23ceba1d6-0 new file mode 100644 index 0000000000000000000000000000000000000000..402ee39dd34b01665b1e14415c681e4c3215ed14 GIT binary patch literal 1006 zcmbVLJ5Iwe7zT-%0U=nJydhPQSi70VhzKV^P0}K&$Z`|64a%c(T%=NR4=%vY6*vVW zI|FCHE~N=g#iLsse-D4(Z`=Kw?VSU9%QGzxMl7LkyE7oBV+NXlvmoOkXn>R^wRJwG zf%=*8p+@ERh-qUQC0VM?#fB_poC>DsgNVgp4w}8$mlElqwqw|q++27?sQ^Wz1e_Td z5eu5Agye!p>9CG+jOz+2&Q>s%XA-FENGD3uDfQMOb%c@o2ka$myM`+x5XXgXsrpNK zgw%;^>M;$AbRhPs$L&uA%fF>J;ng2dmWMHE*`8H7We=p2I0mxsMg5d>I+fK#f+cEF zi+wQf$S?c`2-$#6VgZ_pS@=bDSWfF4nUEOz59O*zr(Wv> zi%|#R&1^TNO4qjCtyZ#3``PJgqbqU$NA`>T`Kt@EkOg~pxu9uCdARld937t=zCJy? K+&^{`eDnc5#YBGq literal 0 HcmV?d00001 diff --git a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/manifest/manifest-list-7395f790-699b-4a38-8747-10f23ceba1d6-1 b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/manifest/manifest-list-7395f790-699b-4a38-8747-10f23ceba1d6-1 new file mode 100644 index 0000000000000000000000000000000000000000..48db56792018c3a24280f8900ad385988a35c51b GIT binary patch literal 1115 zcmbVLzi!h&9Co9K5hPk6R&HX5QJ^>QvG$baephV}v_I-cu`+j!!=)*?!3I52^dghO4jISS_06rO0 z&;pG5DI=f;5*)8QXJhOaFH<(GV{t#C^)U|PG^tO;hBToJb6U`YkVYf}t;3VwN~jNe zj?uNn=FBThILK=Yz$?Rq$kr^lfMlG7$#50r81G7`C|$zXo`p1BhdL@WorT_=NFAEc zy$AM<)^iP4L_m(KxeL{Ql!wqN@s4^d!$y70^vcIQnsA!^k>1kY-UDTOrit2J&n}(v z5G2P@1VrDn`Y~g8BB}{Fjf+Xm_QABHK|ZJfLIZppanLH5`5-S3%V~F@r6EKAOSwg) zmtOldbmySCOELTvi_*tX1YMged`;Mg=6bg4QiW@;dr+=qp7yrWAOYrUUSJ-Kcl$%HW&CX@zeB=4W_D`vER*_Dt)%P(@!hmL6CUkc{ zXf~U&qU`OFP&A+FCaI1r5Vr A3IG5A literal 0 HcmV?d00001 diff --git a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-0.bin b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-0.bin new file mode 100644 index 000000000..1207296c4 --- /dev/null +++ b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-0.bin @@ -0,0 +1 @@ +img_0 \ No newline at end of file diff --git a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-1.bin b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-1.bin new file mode 100644 index 000000000..a3f12b374 --- /dev/null +++ b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-1.bin @@ -0,0 +1 @@ +img_1 \ No newline at end of file diff --git a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-2.bin b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-2.bin new file mode 100644 index 000000000..c4c94eef0 --- /dev/null +++ b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/raw_blob/b0-row-2.bin @@ -0,0 +1 @@ +img_2 \ No newline at end of file diff --git a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/schema/schema-0 b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/schema/schema-0 new file mode 100644 index 000000000..174973a9a --- /dev/null +++ b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/schema/schema-0 @@ -0,0 +1,41 @@ +{ + "version" : 3, + "id" : 0, + "fields" : [ { + "id" : 0, + "name" : "f0", + "type" : "INT" + }, { + "id" : 1, + "name" : "b0", + "type" : "BLOB" + }, { + "id" : 2, + "name" : "b1", + "type" : "BLOB" + }, { + "id" : 3, + "name" : "b2", + "type" : "BLOB" + }, { + "id" : 4, + "name" : "b3", + "type" : "BLOB" + } ], + "highestFieldId" : 4, + "partitionKeys" : [ ], + "primaryKeys" : [ ], + "options" : { + "bucket" : "-1", + "row-tracking.enabled" : "true", + "blob-external-storage-path" : "external_blob", + "target-file-size" : "700", + "blob-external-storage-field" : "b1", + "data-evolution.enabled" : "true", + "file-system" : "local", + "manifest.format" : "avro", + "file.format" : "parquet", + "blob-descriptor-field" : "b0,b1" + }, + "timeMillis" : 1781088620905 +} \ No newline at end of file diff --git a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/EARLIEST b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/EARLIEST new file mode 100644 index 000000000..56a6051ca --- /dev/null +++ b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/EARLIEST @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/LATEST b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/LATEST new file mode 100644 index 000000000..56a6051ca --- /dev/null +++ b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/LATEST @@ -0,0 +1 @@ +1 \ No newline at end of file diff --git a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/snapshot-1 b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/snapshot-1 new file mode 100644 index 000000000..99789afc1 --- /dev/null +++ b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/snapshot/snapshot-1 @@ -0,0 +1,16 @@ +{ + "version" : 3, + "id" : 1, + "schemaId" : 0, + "baseManifestList" : "manifest-list-7395f790-699b-4a38-8747-10f23ceba1d6-0", + "baseManifestListSize" : 1006, + "deltaManifestList" : "manifest-list-7395f790-699b-4a38-8747-10f23ceba1d6-1", + "deltaManifestListSize" : 1115, + "commitUser" : "d30e72bf-d9bb-4de4-a178-f11f2d2f4fa5", + "commitIdentifier" : 0, + "commitKind" : "APPEND", + "timeMillis" : 1781088622173, + "totalRecordCount" : 9, + "deltaRecordCount" : 9, + "nextRowId" : 3 +} \ No newline at end of file From f8da1222d2de56f746820010737451cca586255b Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Wed, 10 Jun 2026 21:26:16 +0800 Subject: [PATCH 2/4] fix test --- test/inte/blob_table_inte_test.cpp | 17 +---------------- .../blob_desc_field_with_external_path/README | 5 ----- .../blob_desc_field_with_external_path/README | 5 ----- 3 files changed, 1 insertion(+), 26 deletions(-) diff --git a/test/inte/blob_table_inte_test.cpp b/test/inte/blob_table_inte_test.cpp index c9a290b89..66ccd0a54 100644 --- a/test/inte/blob_table_inte_test.cpp +++ b/test/inte/blob_table_inte_test.cpp @@ -366,16 +366,6 @@ class BlobTableInteTest : public testing::Test, public ::testing::WithParamInter return PathUtil::JoinPath(rewrite.table_path, relative_blob_path); } } - - std::string file_name = PathUtil::GetName(descriptor_uri); - for (const auto& blob_dir : rewrite.table_relative_blob_dirs) { - std::string candidate = - PathUtil::JoinPath(PathUtil::JoinPath(rewrite.table_path, blob_dir), file_name); - auto exists = fs->Exists(candidate); - if (exists.ok() && exists.value()) { - return candidate; - } - } return std::nullopt; } @@ -2131,12 +2121,7 @@ TEST_P(BlobTableInteTest, TestBlobDescriptorFieldPartialExternalStorageNoAsDescr auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); auto read_struct = std::dynamic_pointer_cast(read_concat); - // After read, b0 and b1 are both descriptor-stored; resolve all back to raw bytes. - // Java-generated descriptors may contain absolute paths from the generation machine. - // Rewrite them to the portable blob directories inside the copied table path. - BlobDescriptorPathRewrite rewrite{table_path, {"raw_blob", "external_blob"}}; - ASSERT_OK_AND_ASSIGN(auto resolved, - ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"}, rewrite)); + ASSERT_OK_AND_ASSIGN(auto resolved, ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"})); ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(raw_array)); ASSERT_TRUE(resolved->Equals(expected_with_rk)); diff --git a/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/README b/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/README index dd0ea5e3a..79f90708b 100644 --- a/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/README +++ b/test/test_data/orc/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/README @@ -23,11 +23,6 @@ Add: 2, "img_1", "vid_1", "raw_2_1", null Add: 3, "img_2", null, "raw_2_2", "raw_3_2" NoCompact -Expected data files after commit: - [0] main orc file (f0, b0, b1), 3 rows - [1] .blob file (b2), 3 rows - [2] .blob file (b3), 3 rows - C++ read note: Descriptor URIs contain absolute paths from the Java generation machine. ConvertDescriptorToRawBlob uses BlobDescriptorPathRewrite{"raw_blob", "external_blob"} diff --git a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/README b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/README index dd0ea5e3a..79f90708b 100644 --- a/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/README +++ b/test/test_data/parquet/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path/README @@ -23,11 +23,6 @@ Add: 2, "img_1", "vid_1", "raw_2_1", null Add: 3, "img_2", null, "raw_2_2", "raw_3_2" NoCompact -Expected data files after commit: - [0] main orc file (f0, b0, b1), 3 rows - [1] .blob file (b2), 3 rows - [2] .blob file (b3), 3 rows - C++ read note: Descriptor URIs contain absolute paths from the Java generation machine. ConvertDescriptorToRawBlob uses BlobDescriptorPathRewrite{"raw_blob", "external_blob"} From 7146202d4faacd02f596f3adcb906675b694c70e Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Thu, 11 Jun 2026 12:01:18 +0800 Subject: [PATCH 3/4] fix a little --- test/inte/blob_table_inte_test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/test/inte/blob_table_inte_test.cpp b/test/inte/blob_table_inte_test.cpp index 66ccd0a54..edc251049 100644 --- a/test/inte/blob_table_inte_test.cpp +++ b/test/inte/blob_table_inte_test.cpp @@ -2121,6 +2121,7 @@ TEST_P(BlobTableInteTest, TestBlobDescriptorFieldPartialExternalStorageNoAsDescr auto read_concat = arrow::Concatenate(result.chunked_array->chunks()).ValueOrDie(); auto read_struct = std::dynamic_pointer_cast(read_concat); + // After read, b0 and b1 are both descriptor-stored; resolve all back to raw bytes ASSERT_OK_AND_ASSIGN(auto resolved, ConvertDescriptorToRawBlob(read_struct, {"b0", "b1"})); ASSERT_OK_AND_ASSIGN(auto expected_with_rk, PrependRowKindColumn(raw_array)); ASSERT_TRUE(resolved->Equals(expected_with_rk)); From 981e0cc6e4e313c25829b66a1560bf2ecee890b4 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Thu, 11 Jun 2026 13:16:02 +0800 Subject: [PATCH 4/4] fix test --- test/inte/blob_table_inte_test.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/inte/blob_table_inte_test.cpp b/test/inte/blob_table_inte_test.cpp index edc251049..4cd6ad053 100644 --- a/test/inte/blob_table_inte_test.cpp +++ b/test/inte/blob_table_inte_test.cpp @@ -3049,6 +3049,10 @@ TEST_P(BlobTableInteTest, TestBlobViewWithFallbackPath) { } TEST_P(BlobTableInteTest, TestReadBlobDescriptorFieldFromJava) { + auto file_format = GetParam(); + if (file_format != "orc" && file_format != "parquet") { + return; + } std::string table_path = GetDataDir() + "/" + file_format + "/blob_desc_field_with_external_path.db/blob_desc_field_with_external_path";