From 8bd355b4dbdf4c660485733c25b59f2598329d74 Mon Sep 17 00:00:00 2001 From: "joey.ljy" Date: Wed, 3 Jun 2026 23:53:02 +0800 Subject: [PATCH 1/3] Allow FileStoreCommit for PK tables with postpone bucket mode Postpone bucket mode (bucket=-2) writes data like an append table: all files go to bucket--2/ directory and the REST catalog server handles bucket redistribution during background compaction. The commit logic (manifest and snapshot generation) is identical to append tables, so there is no reason to block it. See: https://paimon.apache.org/docs/master/primary-key-table/data-distribution/#postpone-bucket Co-Authored-By: Claude Opus 4.6 (1M context) --- .../core/operation/file_store_commit.cpp | 13 ++++- .../operation/file_store_commit_impl_test.cpp | 57 +++++++++++++++++++ 2 files changed, 69 insertions(+), 1 deletion(-) diff --git a/src/paimon/core/operation/file_store_commit.cpp b/src/paimon/core/operation/file_store_commit.cpp index 9d78b74b..a1998954 100644 --- a/src/paimon/core/operation/file_store_commit.cpp +++ b/src/paimon/core/operation/file_store_commit.cpp @@ -31,6 +31,7 @@ #include "paimon/core/operation/file_store_commit_impl.h" #include "paimon/core/schema/schema_manager.h" #include "paimon/core/schema/table_schema.h" +#include "paimon/core/table/bucket_mode.h" #include "paimon/core/utils/field_mapping.h" #include "paimon/core/utils/file_store_path_factory.h" #include "paimon/core/utils/snapshot_manager.h" @@ -68,7 +69,17 @@ Result> FileStoreCommit::Create( const auto& schema = table_schema.value(); if (!schema->PrimaryKeys().empty() && ctx->GetOptions().find("enable-pk-commit-in-inte-test") == ctx->GetOptions().end()) { - return Status::NotImplemented("not support pk table commit yet"); + // Postpone bucket mode (bucket=-2) writes data like an append table: all files go to + // bucket-postpone/ directory and the REST catalog server handles bucket redistribution + // during compaction. The commit logic is the same as append tables, so we allow it. + auto schema_opts = schema->Options(); + auto bucket_it = schema_opts.find("bucket"); + bool is_postpone_bucket = + bucket_it != schema_opts.end() && + bucket_it->second == std::to_string(BucketModeDefine::POSTPONE_BUCKET); + if (!is_postpone_bucket) { + return Status::NotImplemented("not support pk table commit yet"); + } } auto opts = schema->Options(); for (const auto& [key, value] : ctx->GetOptions()) { diff --git a/src/paimon/core/operation/file_store_commit_impl_test.cpp b/src/paimon/core/operation/file_store_commit_impl_test.cpp index a3154bfc..f5910f0c 100644 --- a/src/paimon/core/operation/file_store_commit_impl_test.cpp +++ b/src/paimon/core/operation/file_store_commit_impl_test.cpp @@ -1688,4 +1688,61 @@ TEST_F(FileStoreCommitImplTest, TestObjectStoreAllowedWithRESTCatalogCommit) { ASSERT_FALSE(json.empty()); } +// Verify that FileStoreCommit::Create succeeds for PK tables with postpone bucket mode (bucket=-2) +// without requiring the enable-pk-commit-in-inte-test workaround flag. +TEST_F(FileStoreCommitImplTest, TestPostponeBucketPKTableCommitAllowed) { + auto pk_dir = UniqueTestDirectory::Create(); + ASSERT_TRUE(pk_dir); + std::string pk_root = pk_dir->Str(); + ASSERT_OK_AND_ASSIGN(auto catalog, Catalog::Create(pk_root, {})); + ASSERT_OK(catalog->CreateDatabase("db", {}, false)); + + arrow::Schema pk_schema( + {arrow::field("pk", arrow::int32()), arrow::field("val", arrow::utf8())}); + ::ArrowSchema arrow_schema; + ASSERT_TRUE(arrow::ExportSchema(pk_schema, &arrow_schema).ok()); + std::map table_options = {{"bucket", "-2"}}; + ASSERT_OK(catalog->CreateTable(Identifier("db", "pk_tbl"), &arrow_schema, + /*partition_keys=*/{}, /*primary_keys=*/{"pk"}, table_options, + /*ignore_if_exists=*/false)); + + std::string pk_table_path = PathUtil::JoinPath(pk_root, "db.db/pk_tbl"); + + // Create FileStoreCommit WITHOUT the workaround flag — should succeed for postpone bucket + CommitContextBuilder builder(pk_table_path, "test_user"); + builder.AddOption(Options::FILE_SYSTEM, "local").UseRESTCatalogCommit(true); + ASSERT_OK_AND_ASSIGN(auto commit_context, builder.Finish()); + ASSERT_OK_AND_ASSIGN(auto committer, FileStoreCommit::Create(std::move(commit_context))); + ASSERT_TRUE(committer != nullptr); +} + +// Verify that FileStoreCommit::Create still rejects PK tables with fixed bucket (bucket > 0) +// when the workaround flag is not set. +TEST_F(FileStoreCommitImplTest, TestFixedBucketPKTableCommitRejected) { + auto pk_dir = UniqueTestDirectory::Create(); + ASSERT_TRUE(pk_dir); + std::string pk_root = pk_dir->Str(); + ASSERT_OK_AND_ASSIGN(auto catalog, Catalog::Create(pk_root, {})); + ASSERT_OK(catalog->CreateDatabase("db", {}, false)); + + arrow::Schema pk_schema( + {arrow::field("pk", arrow::int32()), arrow::field("val", arrow::utf8())}); + ::ArrowSchema arrow_schema; + ASSERT_TRUE(arrow::ExportSchema(pk_schema, &arrow_schema).ok()); + std::map table_options = {{"bucket", "4"}}; + ASSERT_OK(catalog->CreateTable(Identifier("db", "pk_tbl_fixed"), &arrow_schema, + /*partition_keys=*/{}, /*primary_keys=*/{"pk"}, table_options, + /*ignore_if_exists=*/false)); + + std::string pk_table_path = PathUtil::JoinPath(pk_root, "db.db/pk_tbl_fixed"); + + CommitContextBuilder builder(pk_table_path, "test_user"); + builder.AddOption(Options::FILE_SYSTEM, "local").UseRESTCatalogCommit(true); + ASSERT_OK_AND_ASSIGN(auto commit_context, builder.Finish()); + auto result = FileStoreCommit::Create(std::move(commit_context)); + ASSERT_FALSE(result.ok()); + ASSERT_TRUE(result.status().ToString().find("not support pk table commit") != + std::string::npos); +} + } // namespace paimon::test From ee8799f890fe29a0e412490c7d16adff5fb9a285 Mon Sep 17 00:00:00 2001 From: Joey Date: Sat, 6 Jun 2026 23:38:05 +0800 Subject: [PATCH 2/3] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/paimon/core/operation/file_store_commit.cpp | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/src/paimon/core/operation/file_store_commit.cpp b/src/paimon/core/operation/file_store_commit.cpp index a1998954..a7675c04 100644 --- a/src/paimon/core/operation/file_store_commit.cpp +++ b/src/paimon/core/operation/file_store_commit.cpp @@ -69,15 +69,10 @@ Result> FileStoreCommit::Create( const auto& schema = table_schema.value(); if (!schema->PrimaryKeys().empty() && ctx->GetOptions().find("enable-pk-commit-in-inte-test") == ctx->GetOptions().end()) { - // Postpone bucket mode (bucket=-2) writes data like an append table: all files go to - // bucket-postpone/ directory and the REST catalog server handles bucket redistribution - // during compaction. The commit logic is the same as append tables, so we allow it. - auto schema_opts = schema->Options(); - auto bucket_it = schema_opts.find("bucket"); - bool is_postpone_bucket = - bucket_it != schema_opts.end() && - bucket_it->second == std::to_string(BucketModeDefine::POSTPONE_BUCKET); - if (!is_postpone_bucket) { + // Postpone bucket mode (bucket=-2) writes all data files to the bucket-postpone/ directory. + // A compaction job will later redistribute files into real buckets. The commit logic + // (manifest and snapshot generation) is the same as append tables, so we allow it. + if (schema->NumBuckets() != BucketModeDefine::POSTPONE_BUCKET) { return Status::NotImplemented("not support pk table commit yet"); } } From f9c9267407af8118d4d6fe48973ddf9ba6b7d89e Mon Sep 17 00:00:00 2001 From: "joey.ljy" Date: Sat, 6 Jun 2026 23:45:07 +0800 Subject: [PATCH 3/3] Address review feedback: use NumBuckets() and Options::BUCKET constant - Replace raw schema options map lookup with TableSchema::NumBuckets() for postpone bucket check - Use Options::BUCKET constant instead of hardcoded "bucket" in tests - Add IsNotImplemented() status kind assertion in rejection test Co-Authored-By: Claude Opus 4.6 (1M context) --- src/paimon/core/operation/file_store_commit_impl_test.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/paimon/core/operation/file_store_commit_impl_test.cpp b/src/paimon/core/operation/file_store_commit_impl_test.cpp index f5910f0c..b8dda6a9 100644 --- a/src/paimon/core/operation/file_store_commit_impl_test.cpp +++ b/src/paimon/core/operation/file_store_commit_impl_test.cpp @@ -1701,7 +1701,7 @@ TEST_F(FileStoreCommitImplTest, TestPostponeBucketPKTableCommitAllowed) { {arrow::field("pk", arrow::int32()), arrow::field("val", arrow::utf8())}); ::ArrowSchema arrow_schema; ASSERT_TRUE(arrow::ExportSchema(pk_schema, &arrow_schema).ok()); - std::map table_options = {{"bucket", "-2"}}; + std::map table_options = {{Options::BUCKET, "-2"}}; ASSERT_OK(catalog->CreateTable(Identifier("db", "pk_tbl"), &arrow_schema, /*partition_keys=*/{}, /*primary_keys=*/{"pk"}, table_options, /*ignore_if_exists=*/false)); @@ -1729,7 +1729,7 @@ TEST_F(FileStoreCommitImplTest, TestFixedBucketPKTableCommitRejected) { {arrow::field("pk", arrow::int32()), arrow::field("val", arrow::utf8())}); ::ArrowSchema arrow_schema; ASSERT_TRUE(arrow::ExportSchema(pk_schema, &arrow_schema).ok()); - std::map table_options = {{"bucket", "4"}}; + std::map table_options = {{Options::BUCKET, "4"}}; ASSERT_OK(catalog->CreateTable(Identifier("db", "pk_tbl_fixed"), &arrow_schema, /*partition_keys=*/{}, /*primary_keys=*/{"pk"}, table_options, /*ignore_if_exists=*/false)); @@ -1741,6 +1741,7 @@ TEST_F(FileStoreCommitImplTest, TestFixedBucketPKTableCommitRejected) { ASSERT_OK_AND_ASSIGN(auto commit_context, builder.Finish()); auto result = FileStoreCommit::Create(std::move(commit_context)); ASSERT_FALSE(result.ok()); + ASSERT_TRUE(result.status().IsNotImplemented()); ASSERT_TRUE(result.status().ToString().find("not support pk table commit") != std::string::npos); }