@@ -27,12 +27,12 @@ use crate::PhysicalOptimizerRule;
2727use datafusion_common:: config:: ConfigOptions ;
2828use datafusion_common:: error:: Result ;
2929use datafusion_common:: tree_node:: { Transformed , TransformedResult , TreeNode } ;
30- use datafusion_common:: { internal_err, JoinSide , JoinType } ;
30+ use datafusion_common:: { internal_err, DataFusionError , JoinSide , JoinType } ;
3131use datafusion_expr_common:: sort_properties:: SortProperties ;
3232use datafusion_physical_expr:: expressions:: Column ;
3333use datafusion_physical_expr:: LexOrdering ;
3434use datafusion_physical_plan:: execution_plan:: EmissionType ;
35- use datafusion_physical_plan:: joins:: utils:: ColumnIndex ;
35+ use datafusion_physical_plan:: joins:: utils:: { check_join_is_valid , ColumnIndex } ;
3636use datafusion_physical_plan:: joins:: {
3737 CrossJoinExec , GraceHashJoinExec , HashJoinExec , NestedLoopJoinExec , PartitionMode ,
3838 StreamJoinPartitionMode , SymmetricHashJoinExec ,
@@ -173,6 +173,13 @@ pub(crate) fn try_collect_left(
173173 let left = hash_join. left ( ) ;
174174 let right = hash_join. right ( ) ;
175175
176+ // Skip collect-left rewrite if the join currently has inconsistent schemas (e.g. required
177+ // columns were projected away temporarily). This mirrors the legacy hash join behavior where
178+ // collect-left is only attempted once the join inputs are fully valid.
179+ if check_join_is_valid ( & left. schema ( ) , & right. schema ( ) , hash_join. on ( ) ) . is_err ( ) {
180+ return Ok ( None ) ;
181+ }
182+
176183 let left_can_collect = ignore_threshold
177184 || supports_collect_by_thresholds (
178185 & * * left,
@@ -191,33 +198,23 @@ pub(crate) fn try_collect_left(
191198 if hash_join. join_type ( ) . supports_swap ( )
192199 && should_swap_join_order ( & * * left, & * * right) ?
193200 {
194- Ok ( Some ( hash_join. swap_inputs ( PartitionMode :: CollectLeft ) ?) )
201+ match hash_join. swap_inputs ( PartitionMode :: CollectLeft ) {
202+ Ok ( plan) => Ok ( Some ( plan) ) ,
203+ Err ( err) if is_missing_join_columns ( & err) => Ok ( None ) ,
204+ Err ( err) => Err ( err) ,
205+ }
195206 } else {
196- Ok ( Some ( Arc :: new ( HashJoinExec :: try_new (
197- Arc :: clone ( left) ,
198- Arc :: clone ( right) ,
199- hash_join. on ( ) . to_vec ( ) ,
200- hash_join. filter ( ) . cloned ( ) ,
201- hash_join. join_type ( ) ,
202- hash_join. projection . clone ( ) ,
203- PartitionMode :: CollectLeft ,
204- hash_join. null_equality ( ) ,
205- ) ?) ) )
207+ build_collect_left_exec ( hash_join, left, right)
206208 }
207209 }
208- ( true , false ) => Ok ( Some ( Arc :: new ( HashJoinExec :: try_new (
209- Arc :: clone ( left) ,
210- Arc :: clone ( right) ,
211- hash_join. on ( ) . to_vec ( ) ,
212- hash_join. filter ( ) . cloned ( ) ,
213- hash_join. join_type ( ) ,
214- hash_join. projection . clone ( ) ,
215- PartitionMode :: CollectLeft ,
216- hash_join. null_equality ( ) ,
217- ) ?) ) ) ,
210+ ( true , false ) => build_collect_left_exec ( hash_join, left, right) ,
218211 ( false , true ) => {
219212 if hash_join. join_type ( ) . supports_swap ( ) {
220- hash_join. swap_inputs ( PartitionMode :: CollectLeft ) . map ( Some )
213+ match hash_join. swap_inputs ( PartitionMode :: CollectLeft ) {
214+ Ok ( plan) => Ok ( Some ( plan) ) ,
215+ Err ( err) if is_missing_join_columns ( & err) => Ok ( None ) ,
216+ Err ( err) => Err ( err) ,
217+ }
221218 } else {
222219 Ok ( None )
223220 }
@@ -226,6 +223,35 @@ pub(crate) fn try_collect_left(
226223 }
227224}
228225
226+ fn is_missing_join_columns ( err : & DataFusionError ) -> bool {
227+ matches ! (
228+ err,
229+ DataFusionError :: Plan ( msg)
230+ if msg. contains( "The left or right side of the join does not have all columns" )
231+ )
232+ }
233+
234+ fn build_collect_left_exec (
235+ hash_join : & HashJoinExec ,
236+ left : & Arc < dyn ExecutionPlan > ,
237+ right : & Arc < dyn ExecutionPlan > ,
238+ ) -> Result < Option < Arc < dyn ExecutionPlan > > > {
239+ match HashJoinExec :: try_new (
240+ Arc :: clone ( left) ,
241+ Arc :: clone ( right) ,
242+ hash_join. on ( ) . to_vec ( ) ,
243+ hash_join. filter ( ) . cloned ( ) ,
244+ hash_join. join_type ( ) ,
245+ hash_join. projection . clone ( ) ,
246+ PartitionMode :: CollectLeft ,
247+ hash_join. null_equality ( ) ,
248+ ) {
249+ Ok ( exec) => Ok ( Some ( Arc :: new ( exec) ) ) ,
250+ Err ( err) if is_missing_join_columns ( & err) => Ok ( None ) ,
251+ Err ( err) => Err ( err) ,
252+ }
253+ }
254+
229255/// Creates a partitioned hash join execution plan, swapping inputs if beneficial.
230256///
231257/// Checks if the join order should be swapped based on the join type and input statistics.
0 commit comments