From b04380dbbc37a3266366c02e9d4efcdfe5099d6a Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Tue, 31 Mar 2026 03:42:20 +0800 Subject: [PATCH 1/2] GH-1098: [Java][Vector] Fix always-true precondition check in LargeListVector.setValueCount --- .../arrow/vector/complex/LargeListVector.java | 2 +- .../arrow/vector/TestLargeListVector.java | 87 +++++++++++++++++++ 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 92dd3eaef..7b15b3774 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -1041,7 +1041,7 @@ public void setValueCount(int valueCount) { * TODO: revisit when 64-bit vectors are supported */ Preconditions.checkArgument( - childValueCount <= Integer.MAX_VALUE || childValueCount >= Integer.MIN_VALUE, + childValueCount <= Integer.MAX_VALUE && childValueCount >= 0, "LargeListVector doesn't yet support 64-bit allocations: %s", childValueCount); vector.setValueCount((int) childValueCount); diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index bf9bba9c7..5d48315b1 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -21,6 +21,7 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNull; import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; @@ -1127,4 +1128,90 @@ private void writeIntValues(UnionLargeListWriter writer, int[] values) { } writer.endList(); } + + @Test + public void testSetValueCountRejectsNegativeChildValueCount() { + try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { + vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + vector.allocateNew(); + + // Write a negative value into the offset buffer to simulate a corrupted offset. + // The Preconditions check should reject any negative childValueCount. + vector.getOffsetBuffer().setLong(LargeListVector.OFFSET_WIDTH, -1L); + vector.setLastSet(0); + + assertThrows( + IllegalArgumentException.class, + () -> vector.setValueCount(1), + "setValueCount should reject negative childValueCount"); + } + } + + @Test + public void testSetValueCountRejectsOverflowChildValueCount() { + try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { + vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + vector.allocateNew(); + + // Write a value exceeding Integer.MAX_VALUE into the offset buffer + vector.getOffsetBuffer().setLong(LargeListVector.OFFSET_WIDTH, (long) Integer.MAX_VALUE + 1L); + vector.setLastSet(0); + + assertThrows( + IllegalArgumentException.class, + () -> vector.setValueCount(1), + "setValueCount should reject childValueCount exceeding Integer.MAX_VALUE"); + } + } + + @Test + public void testSetValueCountAcceptsZeroChildValueCount() { + try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { + vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + vector.allocateNew(); + + // childValueCount = 0 when valueCount = 0 (no elements) + vector.setValueCount(0); + assertEquals(0, vector.getValueCount()); + } + } + + @Test + public void testSetValueCountAcceptsValidChildValueCount() { + try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { + vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + vector.allocateNew(); + + // Write a valid childValueCount (5) into the offset buffer + vector.getOffsetBuffer().setLong(LargeListVector.OFFSET_WIDTH, 5L); + vector.setLastSet(0); + vector.setValueCount(1); + assertEquals(1, vector.getValueCount()); + } + } + + @Test + public void testSetValueCountAcceptsMaxIntChildValueCount() { + try (final LargeListVector vector = LargeListVector.empty("list", allocator)) { + vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + vector.allocateNew(); + + // Write Integer.MAX_VALUE into the offset buffer to verify the upper boundary is inclusive. + // The Preconditions check should accept this value (not throw IllegalArgumentException). + // The child vector may throw OversizedAllocationException due to memory limits, + // which is expected and unrelated to the precondition validation. + vector.getOffsetBuffer().setLong(LargeListVector.OFFSET_WIDTH, (long) Integer.MAX_VALUE); + vector.setLastSet(0); + try { + vector.setValueCount(1); + } catch (IllegalArgumentException e) { + throw new AssertionError( + "setValueCount should not reject childValueCount = Integer.MAX_VALUE", e); + } catch (Exception e) { + // OversizedAllocationException or other allocation errors are expected + // when trying to allocate Integer.MAX_VALUE elements — this is fine, + // the precondition check itself passed. + } + } + } } From 1717f2e77e9f37490aebee5b730a7c2dd082495a Mon Sep 17 00:00:00 2001 From: yangjie01 Date: Fri, 3 Apr 2026 23:47:34 +0800 Subject: [PATCH 2/2] adderss comments --- .../apache/arrow/vector/complex/LargeListVector.java | 4 ++-- .../org/apache/arrow/vector/TestLargeListVector.java | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java index 7b15b3774..9368b5ce3 100644 --- a/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java +++ b/vector/src/main/java/org/apache/arrow/vector/complex/LargeListVector.java @@ -1041,8 +1041,8 @@ public void setValueCount(int valueCount) { * TODO: revisit when 64-bit vectors are supported */ Preconditions.checkArgument( - childValueCount <= Integer.MAX_VALUE && childValueCount >= 0, - "LargeListVector doesn't yet support 64-bit allocations: %s", + childValueCount >= 0 && childValueCount <= Integer.MAX_VALUE, + "LargeListVector childValueCount must be in [0, Integer.MAX_VALUE] but was: %s", childValueCount); vector.setValueCount((int) childValueCount); } diff --git a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java index 5d48315b1..b6823fbf3 100644 --- a/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java +++ b/vector/src/test/java/org/apache/arrow/vector/TestLargeListVector.java @@ -23,6 +23,7 @@ import static org.junit.jupiter.api.Assertions.assertSame; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; import java.util.ArrayList; import java.util.Arrays; @@ -30,6 +31,7 @@ import java.util.UUID; import org.apache.arrow.memory.ArrowBuf; import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.OutOfMemoryException; import org.apache.arrow.vector.complex.BaseRepeatedValueVector; import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; @@ -43,6 +45,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.OversizedAllocationException; import org.apache.arrow.vector.util.TransferPair; import org.apache.arrow.vector.util.UuidUtility; import org.junit.jupiter.api.AfterEach; @@ -1200,14 +1203,13 @@ public void testSetValueCountAcceptsMaxIntChildValueCount() { // The Preconditions check should accept this value (not throw IllegalArgumentException). // The child vector may throw OversizedAllocationException due to memory limits, // which is expected and unrelated to the precondition validation. - vector.getOffsetBuffer().setLong(LargeListVector.OFFSET_WIDTH, (long) Integer.MAX_VALUE); + vector.getOffsetBuffer().setLong(LargeListVector.OFFSET_WIDTH, Integer.MAX_VALUE); vector.setLastSet(0); try { vector.setValueCount(1); } catch (IllegalArgumentException e) { - throw new AssertionError( - "setValueCount should not reject childValueCount = Integer.MAX_VALUE", e); - } catch (Exception e) { + fail("setValueCount should not reject childValueCount = Integer.MAX_VALUE", e); + } catch (OversizedAllocationException | OutOfMemoryException e) { // OversizedAllocationException or other allocation errors are expected // when trying to allocate Integer.MAX_VALUE elements — this is fine, // the precondition check itself passed.