From bad10b33a049a30f3972ba19217e78cbe91a2506 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Fri, 5 Jun 2026 15:19:37 +0800 Subject: [PATCH 1/4] fix: Avoid signed overflow UB in BSI index and null-deref in bloom filter index --- .../bloomfilter/bloom_filter_file_index.cpp | 10 ++++++-- .../bsi/bit_slice_index_bitmap_file_index.cpp | 25 ++++++++++++++----- 2 files changed, 27 insertions(+), 8 deletions(-) diff --git a/src/paimon/common/file_index/bloomfilter/bloom_filter_file_index.cpp b/src/paimon/common/file_index/bloomfilter/bloom_filter_file_index.cpp index 157361f32..06cf57343 100644 --- a/src/paimon/common/file_index/bloomfilter/bloom_filter_file_index.cpp +++ b/src/paimon/common/file_index/bloomfilter/bloom_filter_file_index.cpp @@ -78,9 +78,15 @@ BloomFilterFileIndexReader::BloomFilterFileIndexReader(const FastHash::HashFunct Result> BloomFilterFileIndexReader::VisitEqual( const Literal& literal) { + // This returns `Remain` to align with the current Java implementation in BF index, even though + // its predicate semantics are inconsistent here. In practice, equality tests in predicate + // evaluation always return false when the literal is null. See + // `null_false_leaf_binary_function.h`. + if (literal.IsNull()) { + return FileIndexResult::Remain(); + } int64_t hash = hash_function_(literal); - return literal.IsNull() || filter_.TestHash(hash) ? FileIndexResult::Remain() - : FileIndexResult::Skip(); + return filter_.TestHash(hash) ? FileIndexResult::Remain() : FileIndexResult::Skip(); } } // namespace paimon diff --git a/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp b/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp index 0bbd993ec..062700898 100644 --- a/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp +++ b/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp @@ -17,7 +17,9 @@ #include "paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.h" #include +#include #include +#include #include "fmt/format.h" #include "paimon/common/file_index/bsi/bit_slice_index_roaring_bitmap.h" @@ -31,7 +33,17 @@ #include "paimon/io/data_input_stream.h" #include "paimon/memory/bytes.h" #include "paimon/utils/roaring_bitmap32.h" +namespace { +// Safe absolute value for int64_t that avoids undefined behavior when value == INT64_MIN. +// This mirrors Java's Math.abs() wrapping semantics but produces the correct magnitude. +inline int64_t SafeAbs(int64_t value) { + if (value == INT64_MIN) { + return INT64_MIN; + } + return value < 0 ? -value : value; +} +} // namespace namespace paimon { class MemoryPool; @@ -156,7 +168,7 @@ Result> BitSliceIndexBitmapFileIndexReader::Vis if (value >= 0) { return reader->positive_->GreaterThan(value); } else { - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, reader->negative_->LessThan(-value)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, reader->negative_->LessThan(SafeAbs(value))); RoaringBitmap32 b2 = reader->positive_->IsNotNull(); b1 |= b2; return b1; @@ -173,7 +185,8 @@ Result> BitSliceIndexBitmapFileIndexReader::Vis if (value >= 0) { return reader->positive_->GreaterOrEqual(value); } else { - PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, reader->negative_->LessOrEqual(-value)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, + reader->negative_->LessOrEqual(SafeAbs(value))); RoaringBitmap32 b2 = reader->positive_->IsNotNull(); b1 |= b2; return b1; @@ -188,7 +201,7 @@ Result> BitSliceIndexBitmapFileIndexReader::Vis [literal = literal, reader = shared_from_this()]() -> Result { PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal)); if (value < 0) { - return reader->negative_->GreaterThan(-value); + return reader->negative_->GreaterThan(SafeAbs(value)); } else { PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, reader->positive_->LessThan(value)); RoaringBitmap32 b2 = reader->negative_->IsNotNull(); @@ -204,7 +217,7 @@ Result> BitSliceIndexBitmapFileIndexReader::Vis [literal = literal, reader = shared_from_this()]() -> Result { PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal)); if (value < 0) { - return reader->negative_->GreaterOrEqual(-value); + return reader->negative_->GreaterOrEqual(SafeAbs(value)); } else { PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, reader->positive_->LessOrEqual(value)); RoaringBitmap32 b2 = reader->negative_->IsNotNull(); @@ -234,7 +247,7 @@ Result> BitSliceIndexBitmapFileIndexReader::Vis PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal)); RoaringBitmap32 equal; if (value < 0) { - PAIMON_ASSIGN_OR_RAISE(equal, reader->negative_->Equal(-value)); + PAIMON_ASSIGN_OR_RAISE(equal, reader->negative_->Equal(SafeAbs(value))); } else { PAIMON_ASSIGN_OR_RAISE(equal, reader->positive_->Equal(value)); } @@ -257,7 +270,7 @@ Result> BitSliceIndexBitmapFileIndexReader::Vis PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal)); RoaringBitmap32 equal; if (value < 0) { - PAIMON_ASSIGN_OR_RAISE(equal, reader->negative_->Equal(-value)); + PAIMON_ASSIGN_OR_RAISE(equal, reader->negative_->Equal(SafeAbs(value))); } else { PAIMON_ASSIGN_OR_RAISE(equal, reader->positive_->Equal(value)); } From 94628c5399e1b556421b3880689d9b3d4fb6808a Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Mon, 8 Jun 2026 09:53:51 +0800 Subject: [PATCH 2/4] fix bsi with visit INT64_MIN --- .../bsi/bit_slice_index_bitmap_file_index.cpp | 50 +++++++++++++------ ...bit_slice_index_bitmap_file_index_test.cpp | 46 +++++++++++++++++ 2 files changed, 82 insertions(+), 14 deletions(-) diff --git a/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp b/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp index 062700898..09233cd1c 100644 --- a/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp +++ b/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp @@ -165,7 +165,11 @@ Result> BitSliceIndexBitmapFileIndexReader::Vis BitmapIndexResult::BitmapSupplier bitmap_supplier = [literal = literal, reader = shared_from_this()]() -> Result { PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal)); - if (value >= 0) { + if (value == INT64_MIN) { + // Everything is greater than INT64_MIN (writer cannot store it) + return RoaringBitmap32::Or(reader->positive_->IsNotNull(), + reader->negative_->IsNotNull()); + } else if (value >= 0) { return reader->positive_->GreaterThan(value); } else { PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, reader->negative_->LessThan(SafeAbs(value))); @@ -182,7 +186,11 @@ Result> BitSliceIndexBitmapFileIndexReader::Vis BitmapIndexResult::BitmapSupplier bitmap_supplier = [literal = literal, reader = shared_from_this()]() -> Result { PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal)); - if (value >= 0) { + if (value == INT64_MIN) { + // All non-null rows satisfy x >= INT64_MIN + return RoaringBitmap32::Or(reader->positive_->IsNotNull(), + reader->negative_->IsNotNull()); + } else if (value >= 0) { return reader->positive_->GreaterOrEqual(value); } else { PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, @@ -200,7 +208,10 @@ Result> BitSliceIndexBitmapFileIndexReader::Vis BitmapIndexResult::BitmapSupplier bitmap_supplier = [literal = literal, reader = shared_from_this()]() -> Result { PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal)); - if (value < 0) { + if (value == INT64_MIN) { + // Nothing is less than INT64_MIN + return RoaringBitmap32(); + } else if (value < 0) { return reader->negative_->GreaterThan(SafeAbs(value)); } else { PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, reader->positive_->LessThan(value)); @@ -216,7 +227,10 @@ Result> BitSliceIndexBitmapFileIndexReader::Vis BitmapIndexResult::BitmapSupplier bitmap_supplier = [literal = literal, reader = shared_from_this()]() -> Result { PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal)); - if (value < 0) { + if (value == INT64_MIN) { + // Writer cannot store INT64_MIN, so no row can match + return RoaringBitmap32(); + } else if (value < 0) { return reader->negative_->GreaterOrEqual(SafeAbs(value)); } else { PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, reader->positive_->LessOrEqual(value)); @@ -245,13 +259,17 @@ Result> BitSliceIndexBitmapFileIndexReader::Vis result_bitmaps.reserve(literals.size()); for (const auto& literal : literals) { PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal)); - RoaringBitmap32 equal; - if (value < 0) { - PAIMON_ASSIGN_OR_RAISE(equal, reader->negative_->Equal(SafeAbs(value))); + if (value == INT64_MIN) { + // Writer cannot store INT64_MIN, so no row can match it + result_bitmaps.emplace_back(RoaringBitmap32()); + } else if (value < 0) { + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 equal, + reader->negative_->Equal(SafeAbs(value))); + result_bitmaps.emplace_back(std::move(equal)); } else { - PAIMON_ASSIGN_OR_RAISE(equal, reader->positive_->Equal(value)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 equal, reader->positive_->Equal(value)); + result_bitmaps.emplace_back(std::move(equal)); } - result_bitmaps.emplace_back(std::move(equal)); } return RoaringBitmap32::FastUnion(result_bitmaps); }; @@ -268,13 +286,17 @@ Result> BitSliceIndexBitmapFileIndexReader::Vis result_bitmaps.reserve(literals.size()); for (const auto& literal : literals) { PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal)); - RoaringBitmap32 equal; - if (value < 0) { - PAIMON_ASSIGN_OR_RAISE(equal, reader->negative_->Equal(SafeAbs(value))); + if (value == INT64_MIN) { + // Writer cannot store INT64_MIN, so no row can match it + result_bitmaps.emplace_back(RoaringBitmap32()); + } else if (value < 0) { + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 equal, + reader->negative_->Equal(SafeAbs(value))); + result_bitmaps.emplace_back(std::move(equal)); } else { - PAIMON_ASSIGN_OR_RAISE(equal, reader->positive_->Equal(value)); + PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 equal, reader->positive_->Equal(value)); + result_bitmaps.emplace_back(std::move(equal)); } - result_bitmaps.emplace_back(std::move(equal)); } auto in = RoaringBitmap32::FastUnion(result_bitmaps); ebm -= in; diff --git a/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index_test.cpp b/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index_test.cpp index cc873dbf6..e96b7b688 100644 --- a/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index_test.cpp +++ b/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index_test.cpp @@ -16,6 +16,7 @@ #include "paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.h" +#include #include #include "gtest/gtest.h" @@ -385,4 +386,49 @@ TEST_F(BitSliceIndexBitmapIndexReaderTest, TestUnInvalidType) { "BitSliceIndexBitmapFileIndex only support TINYINT/SMALLINT/INT/BIGINT/DATE"); } +TEST_F(BitSliceIndexBitmapIndexReaderTest, TestReaderPredicatePruningWithInt64Min) { + // Reuse TestPrimitiveType's index bytes. + // data: null, 1, null, 2, -1 (non-null rows: {1, 3, 4}) + std::vector index_bytes = { + 1, 0, 0, 0, 5, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 58, + 48, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 16, 0, 0, 0, 1, 0, 3, 0, 0, 0, 0, 2, 58, + 48, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 1, 0, 58, 48, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 16, 0, 0, 0, 3, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 1, 58, 48, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0, + 0, 0, 1, 58, 48, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 4, 0}; + auto input_stream = + std::make_shared(index_bytes.data(), index_bytes.size()); + BitSliceIndexBitmapFileIndex file_index({}); + ASSERT_OK_AND_ASSIGN( + auto reader, + file_index.CreateReader(CreateArrowSchema(arrow::int64()).get(), + /*start=*/0, /*length=*/index_bytes.size(), input_stream, pool_)); + + // x > INT64_MIN: all non-null rows (writer cannot store INT64_MIN) + CheckResult(reader->VisitGreaterThan(Literal(INT64_MIN)).value(), {1, 3, 4}); + + // x >= INT64_MIN: all non-null rows + CheckResult(reader->VisitGreaterOrEqual(Literal(INT64_MIN)).value(), {1, 3, 4}); + + // x < INT64_MIN: empty + CheckResult(reader->VisitLessThan(Literal(INT64_MIN)).value(), {}); + + // x <= INT64_MIN: empty (no row has INT64_MIN) + CheckResult(reader->VisitLessOrEqual(Literal(INT64_MIN)).value(), {}); + + // x == INT64_MIN: empty + CheckResult(reader->VisitEqual(Literal(INT64_MIN)).value(), {}); + + // x != INT64_MIN: all non-null rows + CheckResult(reader->VisitNotEqual(Literal(INT64_MIN)).value(), {1, 3, 4}); + + // x IN (INT64_MIN, -1): only -1 matches → row 4 + CheckResult(reader->VisitIn({Literal(INT64_MIN), Literal(static_cast(-1))}).value(), + {4}); + + // x NOT IN (INT64_MIN, -1): all non-null except row 4 + CheckResult(reader->VisitNotIn({Literal(INT64_MIN), Literal(static_cast(-1))}).value(), + {1, 3}); +} + } // namespace paimon::test From 85fd8166096c8f166e4e69135ffd944f910c0e1f Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Mon, 8 Jun 2026 10:28:14 +0800 Subject: [PATCH 3/4] fix clang tidy --- .../file_index/bsi/bit_slice_index_bitmap_file_index.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp b/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp index 09233cd1c..d40788a4f 100644 --- a/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp +++ b/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp @@ -261,7 +261,7 @@ Result> BitSliceIndexBitmapFileIndexReader::Vis PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal)); if (value == INT64_MIN) { // Writer cannot store INT64_MIN, so no row can match it - result_bitmaps.emplace_back(RoaringBitmap32()); + result_bitmaps.emplace_back(); } else if (value < 0) { PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 equal, reader->negative_->Equal(SafeAbs(value))); @@ -288,7 +288,7 @@ Result> BitSliceIndexBitmapFileIndexReader::Vis PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal)); if (value == INT64_MIN) { // Writer cannot store INT64_MIN, so no row can match it - result_bitmaps.emplace_back(RoaringBitmap32()); + result_bitmaps.emplace_back(); } else if (value < 0) { PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 equal, reader->negative_->Equal(SafeAbs(value))); From d80c7e46810e8ab814a02fbc433038a2aefb62e4 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Mon, 8 Jun 2026 10:34:35 +0800 Subject: [PATCH 4/4] fix comments --- .../common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp b/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp index d40788a4f..d71262cc2 100644 --- a/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp +++ b/src/paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.cpp @@ -35,7 +35,7 @@ #include "paimon/utils/roaring_bitmap32.h" namespace { // Safe absolute value for int64_t that avoids undefined behavior when value == INT64_MIN. -// This mirrors Java's Math.abs() wrapping semantics but produces the correct magnitude. +// This mirrors Java's Math.abs() wrapping semantics. inline int64_t SafeAbs(int64_t value) { if (value == INT64_MIN) { return INT64_MIN;