From e306c82732eb09e55b4cc7693f4edda520310278 Mon Sep 17 00:00:00 2001 From: Thomas Rebele Date: Fri, 26 Jun 2026 10:26:15 +0200 Subject: [PATCH] HIVE-29688: HiveRelDecorrelator does not support semi-joins with correlation variables only in LHS --- .../calcite/rules/HiveRelDecorrelator.java | 23 ++++++--- .../clientpositive/decorrelate-semi-join.q | 17 +++++++ .../llap/decorrelate-semi-join.q.out | 51 +++++++++++++++++++ 3 files changed, 84 insertions(+), 7 deletions(-) create mode 100644 ql/src/test/queries/clientpositive/decorrelate-semi-join.q create mode 100644 ql/src/test/results/clientpositive/llap/decorrelate-semi-join.q.out diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java index 7769191e5d8a..9c83c3894a55 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelDecorrelator.java @@ -1353,7 +1353,7 @@ public Frame decorrelateRel(Join rel) { throw new UnsupportedOperationException("Correlated subqueries in outer join conditions not supported yet." + " Join condition: " + rel.getCondition()); } - if (!rel.getJoinType().projectsRight()) { + if (JoinRelType.ANTI.equals(rel.getJoinType())) { return decorrelateRel((RelNode) rel); } // @@ -1374,6 +1374,11 @@ public Frame decorrelateRel(Join rel) { return null; } + if (rel.getJoinType().projectsRight() && !rightFrame.corDefOutputs.isEmpty()) { + // decorrelating semi-join with correlation variables in the RHS is not supported yet + return null; + } + final RelNode newJoin = HiveJoin.getJoin(rel.getCluster(), leftFrame.r, rightFrame.r, decorrelateExpr(rel.getCondition()), rel.getJoinType()); @@ -1385,16 +1390,20 @@ public Frame decorrelateRel(Join rel) { int newLeftFieldCount = leftFrame.r.getRowType().getFieldCount(); int oldRightFieldCount = oldRight.getRowType().getFieldCount(); - assert rel.getRowType().getFieldCount() - == oldLeftFieldCount + oldRightFieldCount; + + int expectedRowCount = oldLeftFieldCount + (rel.getJoinType().projectsRight() ? oldRightFieldCount : 0); + if (rel.getRowType().getFieldCount() != expectedRowCount) + throw new AssertionError(); // Left input positions are not changed. mapOldToNewOutputs.putAll(leftFrame.oldToNewOutputs); - // Right input positions are shifted by newLeftFieldCount. - for (int i = 0; i < oldRightFieldCount; i++) { - mapOldToNewOutputs.put(i + oldLeftFieldCount, - rightFrame.oldToNewOutputs.get(i) + newLeftFieldCount); + if (rel.getJoinType().projectsRight()) { + // Right input positions are shifted by newLeftFieldCount. + for (int i = 0; i < oldRightFieldCount; i++) { + mapOldToNewOutputs.put(i + oldLeftFieldCount, + rightFrame.oldToNewOutputs.get(i) + newLeftFieldCount); + } } final SortedMap corDefOutputs = diff --git a/ql/src/test/queries/clientpositive/decorrelate-semi-join.q b/ql/src/test/queries/clientpositive/decorrelate-semi-join.q new file mode 100644 index 000000000000..d4069f0fdf86 --- /dev/null +++ b/ql/src/test/queries/clientpositive/decorrelate-semi-join.q @@ -0,0 +1,17 @@ +DROP TABLE IF EXISTS `tab`; +CREATE EXTERNAL TABLE `tab`( + `f1` string, + `f2` string, + `f3` string, + `f4` string, + `f5` string, + `f6` string); + +SELECT 1 +FROM tab a +WHERE a.f4 IN ('1', '2') +AND EXISTS ( + SELECT 1 + FROM tab b + WHERE a.f6 = b.f1 AND b.f3 IN (SELECT 1) +); diff --git a/ql/src/test/results/clientpositive/llap/decorrelate-semi-join.q.out b/ql/src/test/results/clientpositive/llap/decorrelate-semi-join.q.out new file mode 100644 index 000000000000..3f21b9c24ea9 --- /dev/null +++ b/ql/src/test/results/clientpositive/llap/decorrelate-semi-join.q.out @@ -0,0 +1,51 @@ +PREHOOK: query: DROP TABLE IF EXISTS `tab` +PREHOOK: type: DROPTABLE +PREHOOK: Output: database:default +POSTHOOK: query: DROP TABLE IF EXISTS `tab` +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: database:default +PREHOOK: query: CREATE EXTERNAL TABLE `tab`( + `f1` string, + `f2` string, + `f3` string, + `f4` string, + `f5` string, + `f6` string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@tab +POSTHOOK: query: CREATE EXTERNAL TABLE `tab`( + `f1` string, + `f2` string, + `f3` string, + `f4` string, + `f5` string, + `f6` string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@tab +Warning: Shuffle Join MERGEJOIN[29][tables = [$hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT 1 +FROM tab a +WHERE a.f4 IN ('1', '2') +AND EXISTS ( + SELECT 1 + FROM tab b + WHERE a.f6 = b.f1 AND b.f3 IN (SELECT 1) +) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Input: default@tab +#### A masked pattern was here #### +POSTHOOK: query: SELECT 1 +FROM tab a +WHERE a.f4 IN ('1', '2') +AND EXISTS ( + SELECT 1 + FROM tab b + WHERE a.f6 = b.f1 AND b.f3 IN (SELECT 1) +) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Input: default@tab +#### A masked pattern was here ####