From 6444b00dbe9601fc8dacfffeb9515cfad82f235f Mon Sep 17 00:00:00 2001 From: Radhika Patwari <45709641+rsrkpatwari1234@users.noreply.github.com> Date: Mon, 16 Feb 2026 02:20:10 +0530 Subject: [PATCH 1/3] Update BaseRecordExtractor.java --- .../spi/data/readers/BaseRecordExtractor.java | 34 ++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/BaseRecordExtractor.java b/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/BaseRecordExtractor.java index d2dd78d92068..16f5305e7fd2 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/BaseRecordExtractor.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/data/readers/BaseRecordExtractor.java @@ -91,13 +91,39 @@ protected Map convertRecord(Object value) { * @param value should be verified to be a multi-value type prior to calling this method */ protected Object[] convertMultiValue(Object value) { + Object[] result; if (value instanceof Collection) { - return convertCollection((Collection) value); + result = convertCollection((Collection) value); + } else if (value instanceof Object[]) { + result = convertArray((Object[]) value); + } else { + return convertPrimitiveArray(value); + } + return unwrapElementMapsInArray(result); + } + + /** + * Unwraps arrays where each element is a map with a single key "element" (Parquet/Avro list element convention). + * E.g. [{"element":"abc"}, {"element":"xyz"}] becomes ["abc", "xyz"]. + */ + protected static Object[] unwrapElementMapsInArray(Object[] array) { + if (array == null || array.length == 0) { + return array; + } + for (Object o : array) { + if (!(o instanceof Map)) { + return array; + } + Map m = (Map) o; + if (m.size() != 1 || !m.containsKey("element")) { + return array; + } } - if (value instanceof Object[]) { - return convertArray((Object[]) value); + Object[] unwrapped = new Object[array.length]; + for (int i = 0; i < array.length; i++) { + unwrapped[i] = ((Map) array[i]).get("element"); } - return convertPrimitiveArray(value); + return unwrapped; } protected Object[] convertCollection(Collection collection) { From 412570a04b3c8a94dcfb30085e953980c9902b77 Mon Sep 17 00:00:00 2001 From: Radhika Patwari <45709641+rsrkpatwari1234@users.noreply.github.com> Date: Mon, 16 Feb 2026 02:24:59 +0530 Subject: [PATCH 2/3] Create BaseRecordExtractorTest.java --- .../data/readers/BaseRecordExtractorTest.java | 171 ++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 pinot-spi/src/test/java/org/apache/pinot/spi/data/readers/BaseRecordExtractorTest.java diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/data/readers/BaseRecordExtractorTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/data/readers/BaseRecordExtractorTest.java new file mode 100644 index 000000000000..ec82183dc08b --- /dev/null +++ b/pinot-spi/src/test/java/org/apache/pinot/spi/data/readers/BaseRecordExtractorTest.java @@ -0,0 +1,171 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.spi.data.readers; + +import java.util.Arrays; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import org.testng.Assert; +import org.testng.annotations.Test; + + +/** + * Tests for {@link BaseRecordExtractor}, in particular the unwrapping of Parquet/Avro list "element" + * structs (single-key "element" maps) into plain arrays (fix for + * Pinot issue #17420). + * + *

No existing tests failed because typical tests use plain arrays (e.g. {@code [10, 20]}) or + * primitive arrays; the bug only appears when the source supplies an array of structs with a single + * field {@code "element"} (Parquet/Avro list-element convention). + */ +public class BaseRecordExtractorTest { + + /** Concrete extractor to exercise protected convertMultiValue and convert(). */ + private static final class TestExtractor extends BaseRecordExtractor { + private java.util.Set _fields; + + @Override + public void init(java.util.Set fields, RecordExtractorConfig recordExtractorConfig) { + _fields = fields; + } + + @Override + public GenericRow extract(Object from, GenericRow to) { + if (from instanceof Map && _fields != null) { + Map map = (Map) from; + for (String key : _fields) { + if (map.containsKey(key)) { + to.putValue(key, convert(map.get(key))); + } + } + } + return to; + } + + Object[] callConvertMultiValue(Object value) { + return convertMultiValue(value); + } + } + + @Test + public void testUnwrapElementMapsInArray_listOfElementMaps() { + TestExtractor extractor = new TestExtractor(); + Object[] result = extractor.callConvertMultiValue( + Arrays.asList( + Collections.singletonMap("element", "abc"), + Collections.singletonMap("element", "xyz"))); + Assert.assertNotNull(result); + Assert.assertEquals(result.length, 2); + Assert.assertEquals(result[0], "abc"); + Assert.assertEquals(result[1], "xyz"); + } + + @Test + public void testUnwrapElementMapsInArray_objectArrayOfElementMaps() { + TestExtractor extractor = new TestExtractor(); + Object[] input = new Object[] { + Collections.singletonMap("element", "abc"), + Collections.singletonMap("element", "xyz") + }; + Object[] result = extractor.callConvertMultiValue(input); + Assert.assertNotNull(result); + Assert.assertEquals(result.length, 2); + Assert.assertEquals(result[0], "abc"); + Assert.assertEquals(result[1], "xyz"); + } + + @Test + public void testNoUnwrap_whenNotAllMaps() { + TestExtractor extractor = new TestExtractor(); + Object[] result = extractor.callConvertMultiValue( + Arrays.asList( + Collections.singletonMap("element", "abc"), + "plainString")); + Assert.assertNotNull(result); + Assert.assertEquals(result.length, 2); + Assert.assertEquals(result[0], Collections.singletonMap("element", "abc")); + Assert.assertEquals(result[1], "plainString"); + } + + @Test + public void testNoUnwrap_whenMapHasMultipleKeys() { + TestExtractor extractor = new TestExtractor(); + Map twoKeys = new HashMap<>(); + twoKeys.put("element", "abc"); + twoKeys.put("other", 1); + Object[] result = extractor.callConvertMultiValue( + Arrays.asList( + Collections.singletonMap("element", "abc"), + twoKeys)); + Assert.assertNotNull(result); + Assert.assertEquals(result.length, 2); + // When any element is not a single-key "element" map, array is returned unchanged (no unwrap) + Assert.assertEquals(result[0], Collections.singletonMap("element", "abc")); + Assert.assertEquals(result[1], twoKeys); + } + + @Test + public void testNoUnwrap_whenMapHasDifferentSingleKey() { + TestExtractor extractor = new TestExtractor(); + Object[] result = extractor.callConvertMultiValue( + Collections.singletonList(Collections.singletonMap("not_element", "v"))); + Assert.assertNotNull(result); + Assert.assertEquals(result.length, 1); + Assert.assertEquals(result[0], Collections.singletonMap("not_element", "v")); + } + + @Test + public void testPrimitiveArray_notUnwrapped() { + TestExtractor extractor = new TestExtractor(); + int[] primitive = new int[] { 10, 20 }; + Object[] result = extractor.callConvertMultiValue(primitive); + Assert.assertNotNull(result); + Assert.assertEquals(result.length, 2); + Assert.assertEquals(result[0], 10); + Assert.assertEquals(result[1], 20); + } + + @Test + public void testEmptyList_returnsEmptyArray() { + TestExtractor extractor = new TestExtractor(); + Object[] result = extractor.callConvertMultiValue(Collections.emptyList()); + Assert.assertNotNull(result); + Assert.assertEquals(result.length, 0); + } + + @Test + public void testConvert_producesUnwrappedMultiValue() { + TestExtractor extractor = new TestExtractor(); + extractor.init(new HashSet<>(Collections.singletonList("tags")), null); + GenericRow row = new GenericRow(); + Object record = Collections.singletonMap("tags", + Arrays.asList( + Collections.singletonMap("element", "abc"), + Collections.singletonMap("element", "xyz"))); + extractor.extract(record, row); + Object value = row.getValue("tags"); + Assert.assertTrue(value instanceof Object[]); + Object[] arr = (Object[]) value; + Assert.assertEquals(arr.length, 2); + Assert.assertEquals(arr[0], "abc"); + Assert.assertEquals(arr[1], "xyz"); + } +} From 42bd82539c054da23d5e5ea69c7482b11893e280 Mon Sep 17 00:00:00 2001 From: Radhika Patwari <45709641+rsrkpatwari1234@users.noreply.github.com> Date: Mon, 16 Feb 2026 02:37:25 +0530 Subject: [PATCH 3/3] Update BaseRecordExtractorTest.java --- .../data/readers/BaseRecordExtractorTest.java | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pinot-spi/src/test/java/org/apache/pinot/spi/data/readers/BaseRecordExtractorTest.java b/pinot-spi/src/test/java/org/apache/pinot/spi/data/readers/BaseRecordExtractorTest.java index ec82183dc08b..adaa7ec90c76 100644 --- a/pinot-spi/src/test/java/org/apache/pinot/spi/data/readers/BaseRecordExtractorTest.java +++ b/pinot-spi/src/test/java/org/apache/pinot/spi/data/readers/BaseRecordExtractorTest.java @@ -66,7 +66,7 @@ Object[] callConvertMultiValue(Object value) { } @Test - public void testUnwrapElementMapsInArray_listOfElementMaps() { + public void testUnwrapElementMapsInArrayListOfElementMaps() { TestExtractor extractor = new TestExtractor(); Object[] result = extractor.callConvertMultiValue( Arrays.asList( @@ -79,7 +79,7 @@ public void testUnwrapElementMapsInArray_listOfElementMaps() { } @Test - public void testUnwrapElementMapsInArray_objectArrayOfElementMaps() { + public void testUnwrapElementMapsInArrayObjectArrayOfElementMaps() { TestExtractor extractor = new TestExtractor(); Object[] input = new Object[] { Collections.singletonMap("element", "abc"), @@ -93,7 +93,7 @@ public void testUnwrapElementMapsInArray_objectArrayOfElementMaps() { } @Test - public void testNoUnwrap_whenNotAllMaps() { + public void testNoUnwrapWhenNotAllMaps() { TestExtractor extractor = new TestExtractor(); Object[] result = extractor.callConvertMultiValue( Arrays.asList( @@ -106,7 +106,7 @@ public void testNoUnwrap_whenNotAllMaps() { } @Test - public void testNoUnwrap_whenMapHasMultipleKeys() { + public void testNoUnwrapWhenMapHasMultipleKeys() { TestExtractor extractor = new TestExtractor(); Map twoKeys = new HashMap<>(); twoKeys.put("element", "abc"); @@ -123,7 +123,7 @@ public void testNoUnwrap_whenMapHasMultipleKeys() { } @Test - public void testNoUnwrap_whenMapHasDifferentSingleKey() { + public void testNoUnwrapWhenMapHasDifferentSingleKey() { TestExtractor extractor = new TestExtractor(); Object[] result = extractor.callConvertMultiValue( Collections.singletonList(Collections.singletonMap("not_element", "v"))); @@ -133,7 +133,7 @@ public void testNoUnwrap_whenMapHasDifferentSingleKey() { } @Test - public void testPrimitiveArray_notUnwrapped() { + public void testPrimitiveArrayNotUnwrapped() { TestExtractor extractor = new TestExtractor(); int[] primitive = new int[] { 10, 20 }; Object[] result = extractor.callConvertMultiValue(primitive); @@ -144,7 +144,7 @@ public void testPrimitiveArray_notUnwrapped() { } @Test - public void testEmptyList_returnsEmptyArray() { + public void testEmptyListReturnsEmptyArray() { TestExtractor extractor = new TestExtractor(); Object[] result = extractor.callConvertMultiValue(Collections.emptyList()); Assert.assertNotNull(result); @@ -152,7 +152,7 @@ public void testEmptyList_returnsEmptyArray() { } @Test - public void testConvert_producesUnwrappedMultiValue() { + public void testConvertProducesUnwrappedMultiValue() { TestExtractor extractor = new TestExtractor(); extractor.init(new HashSet<>(Collections.singletonList("tags")), null); GenericRow row = new GenericRow();