apache · vigneshsiva11 · Feb 6, 2026 · Feb 6, 2026 · Feb 14, 2026 · Feb 14, 2026
diff --git a/parquet/src/arrow/arrow_reader/mod.rs b/parquet/src/arrow/arrow_reader/mod.rs
@@ -1370,8 +1370,6 @@ impl ParquetRecordBatchReader {
         }
         match self.read_plan.row_selection_cursor_mut() {
             RowSelectionCursor::Mask(mask_cursor) => {
-                // Stream the record batch reader using contiguous segments of the selection
-                // mask, avoiding the need to materialize intermediate `RowSelector` ranges.
                 while !mask_cursor.is_empty() {
                     let Some(mask_chunk) = mask_cursor.next_mask_chunk(batch_size) else {
                         return Ok(None);
@@ -1395,43 +1393,31 @@ impl ParquetRecordBatchReader {
                         continue;
                     }
 
-                    let mask = mask_cursor.mask_values_for(&mask_chunk)?;
-
                     let read = self.array_reader.read_records(mask_chunk.chunk_rows)?;
                     if read == 0 {
                         return Err(general_err!(
                             "reached end of column while expecting {} rows",
                             mask_chunk.chunk_rows
                         ));
                     }
-                    if read != mask_chunk.chunk_rows {
-                        return Err(general_err!(
-                            "insufficient rows read from array reader - expected {}, got {}",
-                            mask_chunk.chunk_rows,
-                            read
-                        ));
-                    }
 
                     let array = self.array_reader.consume_batch()?;
-                    // The column reader exposes the projection as a struct array; convert this
-                    // into a record batch before applying the boolean filter mask.
                     let struct_array = array.as_struct_opt().ok_or_else(|| {
                         ArrowError::ParquetError(
                             "Struct array reader should return struct array".to_string(),
                         )
                     })?;
 
+                    // Key Change: partial read → emit immediately, no mask
+                    if read < mask_chunk.chunk_rows {
+                        return Ok(Some(RecordBatch::from(struct_array)));
+                    }
+
+                    // Full read , safe to apply mask
-                    // Full read , safe to apply mask
+                    // Full read, safe to apply mask
-                    // Key Change: partial read → emit immediately, no mask
-                    if read < mask_chunk.chunk_rows {
-                        return Ok(Some(RecordBatch::from(struct_array)));
-                    }
-
-                    // Full read , safe to apply mask
+                    // The column reader exposes the projected columns as a single StructArray.
+                    // Convert this struct array into a RecordBatch before applying the boolean
+                    // filter mask so we can use the standard RecordBatch-level filter utilities.
+                    // Key Change: partial read → emit immediately, no mask
+                    if read < mask_chunk.chunk_rows {
+                        return Ok(Some(RecordBatch::from(struct_array)));
+                    }
+
+                    // Full read, safe to apply mask
-                    // Key Change: partial read → emit immediately, no mask
-                    if read < mask_chunk.chunk_rows {
-                        return Ok(Some(RecordBatch::from(struct_array)));
-                    }
-
-                    // Full read , safe to apply mask
+                    // Always apply the selection mask; for partial reads, slice it down to `read`
+                    if read < mask_chunk.chunk_rows {
+                        let full_mask = mask_cursor.mask_values_for(&mask_chunk)?;
+                        let sliced_mask = full_mask.slice(0, read);
+                        let sliced_mask = sliced_mask.as_boolean();
+
+                        let filtered_batch =
+                            filter_record_batch(&RecordBatch::from(struct_array), sliced_mask)?;
+
+                        if filtered_batch.num_rows() == 0 {
+                            continue;
+                        }
+
+                        return Ok(Some(filtered_batch));
+                    }
+
+                    // Full read, apply mask directly
-                    // Full read , safe to apply mask
+                    // Full read, safe to apply mask
-                    // Key Change: partial read → emit immediately, no mask
-                    if read < mask_chunk.chunk_rows {
-                        return Ok(Some(RecordBatch::from(struct_array)));
-                    }
-
-                    // Full read , safe to apply mask
+                    // The column reader exposes the projected columns as a single StructArray.
+                    // Convert this struct array into a RecordBatch before applying the boolean
+                    // filter mask so we can use the standard RecordBatch-level filter utilities.
+                    // Key Change: partial read → emit immediately, no mask
+                    if read < mask_chunk.chunk_rows {
+                        return Ok(Some(RecordBatch::from(struct_array)));
+                    }
+
+                    // Full read, safe to apply mask
-                    // Key Change: partial read → emit immediately, no mask
-                    if read < mask_chunk.chunk_rows {
-                        return Ok(Some(RecordBatch::from(struct_array)));
-                    }
-
-                    // Full read , safe to apply mask
+                    // Always apply the selection mask; for partial reads, slice it down to `read`
+                    if read < mask_chunk.chunk_rows {
+                        let full_mask = mask_cursor.mask_values_for(&mask_chunk)?;
+                        let sliced_mask = full_mask.slice(0, read);
+                        let sliced_mask = sliced_mask.as_boolean();
+
+                        let filtered_batch =
+                            filter_record_batch(&RecordBatch::from(struct_array), sliced_mask)?;
+
+                        if filtered_batch.num_rows() == 0 {
+                            continue;
+                        }
+
+                        return Ok(Some(filtered_batch));
+                    }
+
+                    // Full read, apply mask directly
+                    let mask = mask_cursor.mask_values_for(&mask_chunk)?;
                     let filtered_batch =
                         filter_record_batch(&RecordBatch::from(struct_array), &mask)?;
 
-
+
+                    // For full reads, ensure the mask and filtered output are consistent
+                    if filtered_batch.num_rows() != mask_chunk.selected_rows {
+                        return Err(general_err!(
+                            "row filter inconsistency: expected {} rows, got {}",
+                            mask_chunk.selected_rows,
+                            filtered_batch.num_rows()
+                        ));
+                    }
-
+
+                    // For full reads, ensure the mask and filtered output are consistent
+                    if filtered_batch.num_rows() != mask_chunk.selected_rows {
+                        return Err(general_err!(
+                            "row filter inconsistency: expected {} rows, got {}",
+                            mask_chunk.selected_rows,
+                            filtered_batch.num_rows()
+                        ));
+                    }
-                    if filtered_batch.num_rows() != mask_chunk.selected_rows {
-                        return Err(general_err!(
-                            "filtered rows mismatch selection - expected {}, got {}",
-                            mask_chunk.selected_rows,
-                            filtered_batch.num_rows()
-                        ));
-                    }
-
                     if filtered_batch.num_rows() == 0 {
                         continue;
                     }
@@ -1472,14 +1458,24 @@ impl ParquetRecordBatchReader {
                         }
                         _ => front.row_count,
                     };
-                    match self.array_reader.read_records(to_read)? {
-                        0 => break,
-                        rec => read_records += rec,
-                    };
+                    let rec = self.array_reader.read_records(to_read)?;
+                    if rec == 0 {
+                        break;
+                    }
+
+                    read_records += rec;
+
+                    // stop early if we couldn't read everything requested
+                    if rec < to_read {
-                    if rec < to_read {
+                    if rec < to_read {
+                        let unconsumed = to_read - rec;
+                        if unconsumed > 0 {
+                            selectors_cursor.return_selector(RowSelector::select(unconsumed));
+                        }
-                    if rec < to_read {
+                    if rec < to_read {
+                        let unconsumed = to_read - rec;
+                        if unconsumed > 0 {
+                            selectors_cursor.return_selector(RowSelector::select(unconsumed));
+                        }
+                        break;
+                    }
                 }
             }
             RowSelectionCursor::All => {
-                self.array_reader.read_records(batch_size)?;
+                let rec = self.array_reader.read_records(batch_size)?;
+                if rec == 0 {
+                    return Ok(None);
-                    return Ok(None);
+                    break;
-                    return Ok(None);
+                    break;
+                }
             }
         };
 

diff --git a/parquet/tests/arrow_reader/checksum.rs b/parquet/tests/arrow_reader/checksum.rs
@@ -25,13 +25,15 @@ use parquet::arrow::arrow_reader::ArrowReaderBuilder;
 #[test]
 fn test_datapage_v1_corrupt_checksum() {
     let errors = read_file_batch_errors("datapage_v1-corrupt-checksum.parquet");
-    assert_eq!(errors, [
-        Err("Parquet argument error: Parquet error: Page CRC checksum mismatch".to_string()),
-        Ok(()),
-        Ok(()),
-        Err("Parquet argument error: Parquet error: Page CRC checksum mismatch".to_string()),
-        Err("Parquet argument error: Parquet error: Not all children array length are the same!".to_string())
-    ]);
+    assert_eq!(
+        errors,
+        [
+            Err("Parquet argument error: Parquet error: Page CRC checksum mismatch".to_string()),
+            Ok(()),
+            Ok(()),
+            Err("Parquet argument error: Parquet error: Page CRC checksum mismatch".to_string()),
+        ]
+    );
 }
 
 #[test]