@@ -57,6 +57,8 @@ impl MaterializationPlan {
5757 filter_field_names : & BTreeSet < FieldName > ,
5858 ) -> Self {
5959 let projected_row_bytes = estimate_field_mask_row_bytes ( dtype, projection_field_mask) ;
60+ let projection_aligned_splits =
61+ filter_present && projection_masks_include_wide_fields ( dtype, projection_field_mask) ;
6062 if !filter_present {
6163 return Self :: Monolithic {
6264 projected_row_bytes,
@@ -67,20 +69,20 @@ impl MaterializationPlan {
6769 let Some ( final_fields) = simple_root_projection_fields ( projection, dtype) else {
6870 return Self :: Monolithic {
6971 projected_row_bytes,
70- projection_aligned_splits : false ,
72+ projection_aligned_splits,
7173 } ;
7274 } ;
7375 if final_fields. is_empty ( ) || !final_fields. iter ( ) . all_unique ( ) {
7476 return Self :: Monolithic {
7577 projected_row_bytes,
76- projection_aligned_splits : false ,
78+ projection_aligned_splits,
7779 } ;
7880 }
7981
8082 let Some ( struct_fields) = dtype. as_struct_fields_opt ( ) else {
8183 return Self :: Monolithic {
8284 projected_row_bytes,
83- projection_aligned_splits : false ,
85+ projection_aligned_splits,
8486 } ;
8587 } ;
8688 if final_fields. len ( ) == struct_fields. nfields ( )
@@ -101,7 +103,7 @@ impl MaterializationPlan {
101103 let Some ( field_dtype) = struct_fields. field ( name) else {
102104 return Self :: Monolithic {
103105 projected_row_bytes,
104- projection_aligned_splits : false ,
106+ projection_aligned_splits,
105107 } ;
106108 } ;
107109
@@ -133,15 +135,15 @@ impl MaterializationPlan {
133135 if deferred_groups. is_empty ( ) {
134136 return Self :: Monolithic {
135137 projected_row_bytes,
136- projection_aligned_splits : false ,
138+ projection_aligned_splits,
137139 } ;
138140 }
139141
140142 let total_carry_cost = immediate_carry_cost. saturating_add ( deferred_carry_cost) ;
141143 if total_carry_cost == 0 || deferred_carry_cost. saturating_mul ( 2 ) < total_carry_cost {
142144 return Self :: Monolithic {
143145 projected_row_bytes,
144- projection_aligned_splits : false ,
146+ projection_aligned_splits,
145147 } ;
146148 }
147149
@@ -220,6 +222,38 @@ impl DeferredFieldGroup {
220222 }
221223}
222224
225+ fn projection_masks_include_wide_fields ( dtype : & DType , field_masks : & [ FieldMask ] ) -> bool {
226+ field_masks
227+ . iter ( )
228+ . any ( |mask| mask_targets_wide_field ( dtype, mask) )
229+ }
230+
231+ fn mask_targets_wide_field ( dtype : & DType , field_mask : & FieldMask ) -> bool {
232+ match field_mask {
233+ FieldMask :: All => true ,
234+ FieldMask :: Prefix ( path) | FieldMask :: Exact ( path) => {
235+ if path. is_root ( ) {
236+ return true ;
237+ }
238+
239+ path. resolve ( dtype. clone ( ) )
240+ . map ( |target| is_wide_projection_dtype ( & target) )
241+ . unwrap_or_else ( || is_wide_projection_dtype ( dtype) )
242+ }
243+ }
244+ }
245+
246+ fn is_wide_projection_dtype ( dtype : & DType ) -> bool {
247+ matches ! (
248+ dtype,
249+ DType :: Utf8 ( _)
250+ | DType :: Binary ( _)
251+ | DType :: List ( ..)
252+ | DType :: FixedSizeList ( ..)
253+ | DType :: Struct ( ..)
254+ )
255+ }
256+
223257fn simple_root_projection_fields ( projection : & Expression , dtype : & DType ) -> Option < FieldNames > {
224258 let struct_fields = dtype. as_struct_fields_opt ( ) ?;
225259 if projection. is :: < Root > ( ) {
@@ -237,14 +271,7 @@ fn simple_root_projection_fields(projection: &Expression, dtype: &DType) -> Opti
237271}
238272
239273fn should_defer_field ( dtype : & DType , row_cost_bytes : usize ) -> bool {
240- matches ! (
241- dtype,
242- DType :: Utf8 ( _)
243- | DType :: Binary ( _)
244- | DType :: List ( ..)
245- | DType :: FixedSizeList ( ..)
246- | DType :: Struct ( ..)
247- ) || row_cost_bytes > IMMEDIATE_FIELD_ROW_BYTES_THRESHOLD
274+ is_wide_projection_dtype ( dtype) || row_cost_bytes > IMMEDIATE_FIELD_ROW_BYTES_THRESHOLD
248275}
249276
250277pub ( crate ) fn estimate_field_mask_row_bytes ( dtype : & DType , field_masks : & [ FieldMask ] ) -> usize {
0 commit comments