From 48bd8d900c14fb7f98f085143a446f1779b19c61 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Sun, 7 Jun 2026 12:13:35 +0800 Subject: [PATCH 1/2] [core] Fix BSI reader predicate pruning for Long.MIN_VALUE boundary --- .../bsi/BitSliceIndexBitmapFileIndex.java | 32 ++++++-- .../bsi/BitSliceIndexBitmapFileIndexTest.java | 81 +++++++++++++++++++ 2 files changed, 107 insertions(+), 6 deletions(-) diff --git a/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndex.java b/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndex.java index 680008a736f5..92f47535d6a0 100644 --- a/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndex.java +++ b/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndex.java @@ -240,7 +240,11 @@ public FileIndexResult visitIn(FieldRef fieldRef, List literals) { .map(valueMapper) .map( value -> { - if (value < 0) { + if (value == Long.MIN_VALUE) { + // Writer cannot store Long.MIN_VALUE, so no + // row can match it + return new RoaringBitmap32(); + } else if (value < 0) { return negative.eq(Math.abs(value)); } else { return positive.eq(value); @@ -262,7 +266,9 @@ public FileIndexResult visitNotIn(FieldRef fieldRef, List literals) { .map(valueMapper) .map( value -> { - if (value < 0) { + if (value == Long.MIN_VALUE) { + return new RoaringBitmap32(); + } else if (value < 0) { return negative.eq(Math.abs(value)); } else { return positive.eq(value); @@ -280,7 +286,10 @@ public FileIndexResult visitLessThan(FieldRef fieldRef, Object literal) { return new BitmapIndexResult( () -> { Long value = valueMapper.apply(literal); - if (value < 0) { + if (value == Long.MIN_VALUE) { + // Nothing is less than Long.MIN_VALUE + return new RoaringBitmap32(); + } else if (value < 0) { return negative.gt(Math.abs(value)); } else { return RoaringBitmap32.or(positive.lt(value), negative.isNotNull()); @@ -293,7 +302,10 @@ public BitmapIndexResult visitLessOrEqual(FieldRef fieldRef, Object literal) { return new BitmapIndexResult( () -> { Long value = valueMapper.apply(literal); - if (value < 0) { + if (value == Long.MIN_VALUE) { + // Writer cannot store Long.MIN_VALUE, so no row can match + return new RoaringBitmap32(); + } else if (value < 0) { return negative.gte(Math.abs(value)); } else { return RoaringBitmap32.or(positive.lte(value), negative.isNotNull()); @@ -306,7 +318,11 @@ public FileIndexResult visitGreaterThan(FieldRef fieldRef, Object literal) { return new BitmapIndexResult( () -> { Long value = valueMapper.apply(literal); - if (value < 0) { + if (value == Long.MIN_VALUE) { + // Everything is greater than Long.MIN_VALUE (writer cannot store it) + return RoaringBitmap32.or( + positive.isNotNull(), negative.isNotNull()); + } else if (value < 0) { return RoaringBitmap32.or( positive.isNotNull(), negative.lt(Math.abs(value))); } else { @@ -320,7 +336,11 @@ public BitmapIndexResult visitGreaterOrEqual(FieldRef fieldRef, Object literal) return new BitmapIndexResult( () -> { Long value = valueMapper.apply(literal); - if (value < 0) { + if (value == Long.MIN_VALUE) { + // All non-null rows satisfy x >= Long.MIN_VALUE + return RoaringBitmap32.or( + positive.isNotNull(), negative.isNotNull()); + } else if (value < 0) { return RoaringBitmap32.or( positive.isNotNull(), negative.lte(Math.abs(value))); } else { diff --git a/paimon-common/src/test/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexTest.java b/paimon-common/src/test/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexTest.java index b55e2e77e150..ad60831ea265 100644 --- a/paimon-common/src/test/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexTest.java +++ b/paimon-common/src/test/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndexTest.java @@ -23,6 +23,7 @@ import org.apache.paimon.fileindex.bitmap.BitmapIndexResult; import org.apache.paimon.fs.ByteArraySeekableStream; import org.apache.paimon.predicate.FieldRef; +import org.apache.paimon.types.BigIntType; import org.apache.paimon.types.IntType; import org.apache.paimon.utils.RoaringBitmap32; @@ -31,6 +32,7 @@ import java.util.Arrays; import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; /** test for {@link BitSliceIndexBitmapFileIndex}. */ public class BitSliceIndexBitmapFileIndexTest { @@ -250,4 +252,83 @@ public void testBitSliceIndexNegativeOnly() { assertThat(((BitmapIndexResult) reader.visitGreaterOrEqual(fieldRef, 1)).get()) .isEqualTo(RoaringBitmap32.bitmapOf()); } + + @Test + public void testReaderPredicatePruningWithLongMinValue() { + BigIntType bigIntType = new BigIntType(); + FieldRef fieldRef = new FieldRef(0, "", bigIntType); + BitSliceIndexBitmapFileIndex bsiFileIndex = new BitSliceIndexBitmapFileIndex(bigIntType); + FileIndexWriter writer = bsiFileIndex.createWriter(); + + // Use values that include negative numbers but NOT Long.MIN_VALUE itself + // (since the writer cannot handle it). This isolates the reader-side bug. + // Data: [-100, -1, null, 0, 1, 50] + Object[] arr = {-100L, -1L, null, 0L, 1L, 50L}; + + for (Object o : arr) { + writer.write(o); + } + byte[] bytes = writer.serializedBytes(); + ByteArraySeekableStream stream = new ByteArraySeekableStream(bytes); + FileIndexReader reader = bsiFileIndex.createReader(stream, 0, bytes.length); + + // All non-null row ids: {0, 1, 3, 4, 5} + + // x > Long.MIN_VALUE: every int64 value > Long.MIN_VALUE (since no row IS Long.MIN_VALUE), + // so result should be ALL non-null rows = {0, 1, 3, 4, 5} + RoaringBitmap32 gtResult = + ((BitmapIndexResult) reader.visitGreaterThan(fieldRef, Long.MIN_VALUE)).get(); + assertThat(gtResult) + .as("x > Long.MIN_VALUE should return all non-null rows") + .isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 3, 4, 5)); + + // x >= Long.MIN_VALUE: same — all non-null rows satisfy this + RoaringBitmap32 gteResult = + ((BitmapIndexResult) reader.visitGreaterOrEqual(fieldRef, Long.MIN_VALUE)).get(); + assertThat(gteResult) + .as("x >= Long.MIN_VALUE should return all non-null rows") + .isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 3, 4, 5)); + + // x < Long.MIN_VALUE: no int64 value is less than Long.MIN_VALUE, so result should be + // empty + RoaringBitmap32 ltResult = + ((BitmapIndexResult) reader.visitLessThan(fieldRef, Long.MIN_VALUE)).get(); + assertThat(ltResult) + .as("x < Long.MIN_VALUE should return empty") + .isEqualTo(RoaringBitmap32.bitmapOf()); + + // x <= Long.MIN_VALUE: no row has Long.MIN_VALUE, so result should be empty + RoaringBitmap32 lteResult = + ((BitmapIndexResult) reader.visitLessOrEqual(fieldRef, Long.MIN_VALUE)).get(); + assertThat(lteResult) + .as("x <= Long.MIN_VALUE should return empty (no row has that value)") + .isEqualTo(RoaringBitmap32.bitmapOf()); + + // x == Long.MIN_VALUE: no row has Long.MIN_VALUE, so result should be empty + RoaringBitmap32 eqResult = + ((BitmapIndexResult) reader.visitEqual(fieldRef, Long.MIN_VALUE)).get(); + assertThat(eqResult) + .as("x == Long.MIN_VALUE should return empty") + .isEqualTo(RoaringBitmap32.bitmapOf()); + + // x != Long.MIN_VALUE: all non-null rows (no row has Long.MIN_VALUE) + RoaringBitmap32 neqResult = + ((BitmapIndexResult) reader.visitNotEqual(fieldRef, Long.MIN_VALUE)).get(); + assertThat(neqResult) + .as("x != Long.MIN_VALUE should return all non-null rows") + .isEqualTo(RoaringBitmap32.bitmapOf(0, 1, 3, 4, 5)); + } + + @Test + public void testWriterCannotHandleLongMinValue() { + BigIntType bigIntType = new BigIntType(); + BitSliceIndexBitmapFileIndex bsiFileIndex = new BitSliceIndexBitmapFileIndex(bigIntType); + FileIndexWriter writer = bsiFileIndex.createWriter(); + writer.write(Long.MIN_VALUE); + + assertThatThrownBy(writer::serializedBytes) + .isInstanceOf(RuntimeException.class) + .hasCauseInstanceOf(IllegalArgumentException.class) + .hasRootCauseMessage("values should be non-negative"); + } } From bb7e1c119ea83586d725443d888059b53cd78727 Mon Sep 17 00:00:00 2001 From: lxy264173 Date: Sun, 7 Jun 2026 12:20:30 +0800 Subject: [PATCH 2/2] apply spotless --- .../paimon/fileindex/bsi/BitSliceIndexBitmapFileIndex.java | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndex.java b/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndex.java index 92f47535d6a0..f9f0f95cecf7 100644 --- a/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndex.java +++ b/paimon-common/src/main/java/org/apache/paimon/fileindex/bsi/BitSliceIndexBitmapFileIndex.java @@ -320,8 +320,7 @@ public FileIndexResult visitGreaterThan(FieldRef fieldRef, Object literal) { Long value = valueMapper.apply(literal); if (value == Long.MIN_VALUE) { // Everything is greater than Long.MIN_VALUE (writer cannot store it) - return RoaringBitmap32.or( - positive.isNotNull(), negative.isNotNull()); + return RoaringBitmap32.or(positive.isNotNull(), negative.isNotNull()); } else if (value < 0) { return RoaringBitmap32.or( positive.isNotNull(), negative.lt(Math.abs(value))); @@ -338,8 +337,7 @@ public BitmapIndexResult visitGreaterOrEqual(FieldRef fieldRef, Object literal) Long value = valueMapper.apply(literal); if (value == Long.MIN_VALUE) { // All non-null rows satisfy x >= Long.MIN_VALUE - return RoaringBitmap32.or( - positive.isNotNull(), negative.isNotNull()); + return RoaringBitmap32.or(positive.isNotNull(), negative.isNotNull()); } else if (value < 0) { return RoaringBitmap32.or( positive.isNotNull(), negative.lte(Math.abs(value)));