From 9ca39f229f7d45282b69d37a066846f0bd881942 Mon Sep 17 00:00:00 2001 From: Shefeek Jinnah Date: Tue, 26 May 2026 13:48:41 +0530 Subject: [PATCH] fix: pass file_size to ParquetObjectReader to avoid page-index panic Without an explicit file_size, ParquetObjectReader falls back to the suffix-fetch metadata path, which mislabels `remainder_start` as 0 in parquet 58.1 and panics in `load_page_index_with_remainder` for any file containing page indexes. --- src/datafusion/src/reader/plantime/source.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/datafusion/src/reader/plantime/source.rs b/src/datafusion/src/reader/plantime/source.rs index 6c193c3f..4efb9788 100644 --- a/src/datafusion/src/reader/plantime/source.rs +++ b/src/datafusion/src/reader/plantime/source.rs @@ -59,7 +59,11 @@ impl CachedMetaReaderFactory { ) -> ParquetMetadataCacheReader { let path = partitioned_file.object_meta.location.clone(); let store = Arc::clone(&self.store); - let mut inner = ParquetObjectReader::new(store, path.clone()); + // Pass file_size to take the size-aware metadata path; parquet 58.1's + // suffix fallback mislabels `remainder_start` as 0 and panics when + // loading page indexes. + let mut inner = ParquetObjectReader::new(store, path.clone()) + .with_file_size(partitioned_file.object_meta.size); if let Some(hint) = metadata_size_hint { inner = inner.with_footer_size_hint(hint);