Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,15 @@ BloomFilterFileIndexReader::BloomFilterFileIndexReader(const FastHash::HashFunct

Result<std::shared_ptr<FileIndexResult>> BloomFilterFileIndexReader::VisitEqual(
const Literal& literal) {
// This returns `Remain` to align with the current Java implementation in BF index, even though
// its predicate semantics are inconsistent here. In practice, equality tests in predicate
// evaluation always return false when the literal is null. See
// `null_false_leaf_binary_function.h`.
if (literal.IsNull()) {
return FileIndexResult::Remain();
}
int64_t hash = hash_function_(literal);
return literal.IsNull() || filter_.TestHash(hash) ? FileIndexResult::Remain()
: FileIndexResult::Skip();
return filter_.TestHash(hash) ? FileIndexResult::Remain() : FileIndexResult::Skip();
}

} // namespace paimon
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
#include "paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.h"

#include <cassert>
#include <climits>
#include <cstddef>
#include <cstdint>

#include "fmt/format.h"
#include "paimon/common/file_index/bsi/bit_slice_index_roaring_bitmap.h"
Expand All @@ -31,7 +33,17 @@
#include "paimon/io/data_input_stream.h"
#include "paimon/memory/bytes.h"
#include "paimon/utils/roaring_bitmap32.h"
namespace {
// Safe absolute value for int64_t that avoids undefined behavior when value == INT64_MIN.
// This mirrors Java's Math.abs() wrapping semantics.
inline int64_t SafeAbs(int64_t value) {
if (value == INT64_MIN) {
return INT64_MIN;
}
return value < 0 ? -value : value;
}
Comment thread
lxy-9602 marked this conversation as resolved.

} // namespace
namespace paimon {
class MemoryPool;

Expand Down Expand Up @@ -153,10 +165,14 @@ Result<std::shared_ptr<FileIndexResult>> BitSliceIndexBitmapFileIndexReader::Vis
BitmapIndexResult::BitmapSupplier bitmap_supplier =
[literal = literal, reader = shared_from_this()]() -> Result<RoaringBitmap32> {
PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal));
if (value >= 0) {
if (value == INT64_MIN) {
// Everything is greater than INT64_MIN (writer cannot store it)
return RoaringBitmap32::Or(reader->positive_->IsNotNull(),
reader->negative_->IsNotNull());
} else if (value >= 0) {
return reader->positive_->GreaterThan(value);
} else {
PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, reader->negative_->LessThan(-value));
PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, reader->negative_->LessThan(SafeAbs(value)));
RoaringBitmap32 b2 = reader->positive_->IsNotNull();
b1 |= b2;
return b1;
Comment thread
lxy-9602 marked this conversation as resolved.
Expand All @@ -170,10 +186,15 @@ Result<std::shared_ptr<FileIndexResult>> BitSliceIndexBitmapFileIndexReader::Vis
BitmapIndexResult::BitmapSupplier bitmap_supplier =
[literal = literal, reader = shared_from_this()]() -> Result<RoaringBitmap32> {
PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal));
if (value >= 0) {
if (value == INT64_MIN) {
// All non-null rows satisfy x >= INT64_MIN
return RoaringBitmap32::Or(reader->positive_->IsNotNull(),
reader->negative_->IsNotNull());
} else if (value >= 0) {
return reader->positive_->GreaterOrEqual(value);
} else {
PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, reader->negative_->LessOrEqual(-value));
PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1,
reader->negative_->LessOrEqual(SafeAbs(value)));
RoaringBitmap32 b2 = reader->positive_->IsNotNull();
b1 |= b2;
return b1;
Comment thread
lxy-9602 marked this conversation as resolved.
Expand All @@ -187,8 +208,11 @@ Result<std::shared_ptr<FileIndexResult>> BitSliceIndexBitmapFileIndexReader::Vis
BitmapIndexResult::BitmapSupplier bitmap_supplier =
[literal = literal, reader = shared_from_this()]() -> Result<RoaringBitmap32> {
PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal));
if (value < 0) {
return reader->negative_->GreaterThan(-value);
if (value == INT64_MIN) {
// Nothing is less than INT64_MIN
return RoaringBitmap32();
} else if (value < 0) {
return reader->negative_->GreaterThan(SafeAbs(value));
} else {
Comment thread
lxy-9602 marked this conversation as resolved.
PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, reader->positive_->LessThan(value));
RoaringBitmap32 b2 = reader->negative_->IsNotNull();
Expand All @@ -203,8 +227,11 @@ Result<std::shared_ptr<FileIndexResult>> BitSliceIndexBitmapFileIndexReader::Vis
BitmapIndexResult::BitmapSupplier bitmap_supplier =
[literal = literal, reader = shared_from_this()]() -> Result<RoaringBitmap32> {
PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal));
if (value < 0) {
return reader->negative_->GreaterOrEqual(-value);
if (value == INT64_MIN) {
// Writer cannot store INT64_MIN, so no row can match
return RoaringBitmap32();
} else if (value < 0) {
return reader->negative_->GreaterOrEqual(SafeAbs(value));
} else {
Comment thread
lxy-9602 marked this conversation as resolved.
PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 b1, reader->positive_->LessOrEqual(value));
RoaringBitmap32 b2 = reader->negative_->IsNotNull();
Expand Down Expand Up @@ -232,13 +259,17 @@ Result<std::shared_ptr<FileIndexResult>> BitSliceIndexBitmapFileIndexReader::Vis
result_bitmaps.reserve(literals.size());
for (const auto& literal : literals) {
PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal));
RoaringBitmap32 equal;
if (value < 0) {
PAIMON_ASSIGN_OR_RAISE(equal, reader->negative_->Equal(-value));
if (value == INT64_MIN) {
// Writer cannot store INT64_MIN, so no row can match it
result_bitmaps.emplace_back();
} else if (value < 0) {
PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 equal,
reader->negative_->Equal(SafeAbs(value)));
result_bitmaps.emplace_back(std::move(equal));
} else {
PAIMON_ASSIGN_OR_RAISE(equal, reader->positive_->Equal(value));
PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 equal, reader->positive_->Equal(value));
result_bitmaps.emplace_back(std::move(equal));
}
result_bitmaps.emplace_back(std::move(equal));
}
return RoaringBitmap32::FastUnion(result_bitmaps);
};
Expand All @@ -255,13 +286,17 @@ Result<std::shared_ptr<FileIndexResult>> BitSliceIndexBitmapFileIndexReader::Vis
result_bitmaps.reserve(literals.size());
for (const auto& literal : literals) {
PAIMON_ASSIGN_OR_RAISE(int64_t value, reader->value_mapper_(literal));
RoaringBitmap32 equal;
if (value < 0) {
PAIMON_ASSIGN_OR_RAISE(equal, reader->negative_->Equal(-value));
if (value == INT64_MIN) {
// Writer cannot store INT64_MIN, so no row can match it
result_bitmaps.emplace_back();
} else if (value < 0) {
PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 equal,
reader->negative_->Equal(SafeAbs(value)));
result_bitmaps.emplace_back(std::move(equal));
} else {
PAIMON_ASSIGN_OR_RAISE(equal, reader->positive_->Equal(value));
PAIMON_ASSIGN_OR_RAISE(RoaringBitmap32 equal, reader->positive_->Equal(value));
result_bitmaps.emplace_back(std::move(equal));
}
result_bitmaps.emplace_back(std::move(equal));
}
auto in = RoaringBitmap32::FastUnion(result_bitmaps);
ebm -= in;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "paimon/common/file_index/bsi/bit_slice_index_bitmap_file_index.h"

#include <cstdint>
#include <utility>

#include "gtest/gtest.h"
Expand Down Expand Up @@ -385,4 +386,49 @@ TEST_F(BitSliceIndexBitmapIndexReaderTest, TestUnInvalidType) {
"BitSliceIndexBitmapFileIndex only support TINYINT/SMALLINT/INT/BIGINT/DATE");
}

TEST_F(BitSliceIndexBitmapIndexReaderTest, TestReaderPredicatePruningWithInt64Min) {
// Reuse TestPrimitiveType's index bytes.
// data: null, 1, null, 2, -1 (non-null rows: {1, 3, 4})
std::vector<char> index_bytes = {
1, 0, 0, 0, 5, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 58,
48, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 16, 0, 0, 0, 1, 0, 3, 0, 0, 0, 0, 2, 58,
48, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 1, 0, 58, 48, 0, 0, 1, 0, 0,
0, 0, 0, 0, 0, 16, 0, 0, 0, 3, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 58, 48, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 4, 0, 0,
0, 0, 1, 58, 48, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 16, 0, 0, 0, 4, 0};
auto input_stream =
std::make_shared<ByteArrayInputStream>(index_bytes.data(), index_bytes.size());
BitSliceIndexBitmapFileIndex file_index({});
ASSERT_OK_AND_ASSIGN(
auto reader,
file_index.CreateReader(CreateArrowSchema(arrow::int64()).get(),
/*start=*/0, /*length=*/index_bytes.size(), input_stream, pool_));

// x > INT64_MIN: all non-null rows (writer cannot store INT64_MIN)
CheckResult(reader->VisitGreaterThan(Literal(INT64_MIN)).value(), {1, 3, 4});

// x >= INT64_MIN: all non-null rows
CheckResult(reader->VisitGreaterOrEqual(Literal(INT64_MIN)).value(), {1, 3, 4});

// x < INT64_MIN: empty
CheckResult(reader->VisitLessThan(Literal(INT64_MIN)).value(), {});

// x <= INT64_MIN: empty (no row has INT64_MIN)
CheckResult(reader->VisitLessOrEqual(Literal(INT64_MIN)).value(), {});

// x == INT64_MIN: empty
CheckResult(reader->VisitEqual(Literal(INT64_MIN)).value(), {});

// x != INT64_MIN: all non-null rows
CheckResult(reader->VisitNotEqual(Literal(INT64_MIN)).value(), {1, 3, 4});

// x IN (INT64_MIN, -1): only -1 matches → row 4
CheckResult(reader->VisitIn({Literal(INT64_MIN), Literal(static_cast<int64_t>(-1))}).value(),
{4});

// x NOT IN (INT64_MIN, -1): all non-null except row 4
CheckResult(reader->VisitNotIn({Literal(INT64_MIN), Literal(static_cast<int64_t>(-1))}).value(),
{1, 3});
}

} // namespace paimon::test
Loading