2929import java .util .stream .Collectors ;
3030import java .util .stream .Stream ;
3131import java .util .stream .StreamSupport ;
32- import org .apache .arrow .vector .NullCheckingForGet ;
3332import org .apache .arrow .vector .VectorSchemaRoot ;
3433import org .apache .arrow .vector .types .Types .MinorType ;
3534import org .apache .iceberg .CombinedScanTask ;
4039import org .apache .iceberg .encryption .EncryptedFiles ;
4140import org .apache .iceberg .encryption .EncryptedInputFile ;
4241import org .apache .iceberg .encryption .EncryptionManager ;
42+ import org .apache .iceberg .formats .FormatModelRegistry ;
43+ import org .apache .iceberg .formats .ReadBuilder ;
4344import org .apache .iceberg .io .CloseableGroup ;
4445import org .apache .iceberg .io .CloseableIterable ;
4546import org .apache .iceberg .io .CloseableIterator ;
4647import org .apache .iceberg .io .FileIO ;
4748import org .apache .iceberg .io .InputFile ;
4849import org .apache .iceberg .mapping .NameMappingParser ;
49- import org .apache .iceberg .parquet .Parquet ;
5050import org .apache .iceberg .parquet .TypeWithSchemaVisitor ;
5151import org .apache .iceberg .relocated .com .google .common .base .Preconditions ;
5252import org .apache .iceberg .relocated .com .google .common .collect .ImmutableMap ;
@@ -189,8 +189,7 @@ public void close() throws IOException {
189189 * Reads the data file and returns an iterator of {@link VectorSchemaRoot}. Only Parquet data file
190190 * format is supported.
191191 */
192- private static final class VectorizedCombinedScanIterator
193- implements CloseableIterator <ColumnarBatch > {
192+ static final class VectorizedCombinedScanIterator implements CloseableIterator <ColumnarBatch > {
194193
195194 private final Iterator <FileScanTask > fileItr ;
196195 private final Map <String , InputFile > inputFiles ;
@@ -324,19 +323,8 @@ CloseableIterator<ColumnarBatch> open(FileScanTask task) {
324323 InputFile location = getInputFile (task );
325324 Preconditions .checkNotNull (location , "Could not find InputFile associated with FileScanTask" );
326325 if (task .file ().format () == FileFormat .PARQUET ) {
327- Parquet .ReadBuilder builder =
328- Parquet .read (location )
329- .project (expectedSchema )
330- .split (task .start (), task .length ())
331- .createBatchedReaderFunc (
332- fileSchema ->
333- buildReader (
334- expectedSchema ,
335- fileSchema , /* setArrowValidityVector */
336- NullCheckingForGet .NULL_CHECKING_ENABLED ))
337- .recordsPerBatch (batchSize )
338- .filter (task .residual ())
339- .caseSensitive (caseSensitive );
326+ ReadBuilder <ColumnarBatch , ?> builder =
327+ FormatModelRegistry .readBuilder (FileFormat .PARQUET , ColumnarBatch .class , location );
340328
341329 if (reuseContainers ) {
342330 builder .reuseContainers ();
@@ -345,7 +333,14 @@ CloseableIterator<ColumnarBatch> open(FileScanTask task) {
345333 builder .withNameMapping (NameMappingParser .fromJson (nameMapping ));
346334 }
347335
348- iter = builder .build ();
336+ iter =
337+ builder
338+ .project (expectedSchema )
339+ .split (task .start (), task .length ())
340+ .recordsPerBatch (batchSize )
341+ .caseSensitive (caseSensitive )
342+ .filter (task .residual ())
343+ .build ();
349344 } else {
350345 throw new UnsupportedOperationException (
351346 "Format: " + task .file ().format () + " not supported for batched reads" );
@@ -376,7 +371,7 @@ private InputFile getInputFile(FileScanTask task) {
376371 * @param fileSchema Schema of the data file.
377372 * @param setArrowValidityVector Indicates whether to set the validity vector in Arrow vectors.
378373 */
379- private static ArrowBatchReader buildReader (
374+ static ArrowBatchReader buildReader (
380375 Schema expectedSchema , MessageType fileSchema , boolean setArrowValidityVector ) {
381376 return (ArrowBatchReader )
382377 TypeWithSchemaVisitor .visit (
0 commit comments