Skip to content

Commit 18fc8f9

Browse files
authored
HIVE-29538: AssertionError in StatefulFunctionsChecker when compiling queries with NVL/COALESCE/IF over field access on ARRAY<STRUCT> (#6402)
1 parent 2554db0 commit 18fc8f9

4 files changed

Lines changed: 71 additions & 8 deletions

File tree

ql/src/java/org/apache/hadoop/hive/ql/parse/type/HiveFunctionHelper.java

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -345,12 +345,7 @@ public Void visitCall(final RexCall call) {
345345
// doing this check.
346346
GenericUDF nodeUDF = SqlFunctionConverter.getHiveUDF(
347347
call.getOperator(), call.getType(), call.getOperands().size());
348-
if (nodeUDF == null) {
349-
throw new AssertionError("Cannot retrieve function " + call.getOperator().getName()
350-
+ " within StatefulFunctionsChecker");
351-
}
352-
// Stateful?
353-
if (FunctionRegistry.isStateful(nodeUDF)) {
348+
if (nodeUDF != null && FunctionRegistry.isStateful(nodeUDF)) {
354349
throw new Util.FoundOne(call);
355350
}
356351
return super.visitCall(call);

ql/src/test/org/apache/hadoop/hive/ql/parse/type/TestHiveFunctionHelper.java

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,18 @@
1818
package org.apache.hadoop.hive.ql.parse.type;
1919

2020
import static org.junit.Assert.assertEquals;
21+
import static org.junit.Assert.assertNotNull;
22+
import static org.junit.Assert.assertTrue;
2123

2224
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
2325
import org.apache.calcite.rel.type.RelDataTypeFactory;
2426
import org.apache.calcite.rex.RexBuilder;
2527
import org.apache.calcite.rex.RexCall;
2628
import org.apache.calcite.rex.RexNode;
29+
import org.apache.calcite.sql.SqlKind;
2730
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
31+
import org.apache.hadoop.hive.ql.exec.FunctionInfo;
32+
import org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveComponentAccess;
2833
import org.apache.hadoop.hive.ql.parse.SemanticException;
2934

3035
import com.google.common.collect.Lists;
@@ -41,9 +46,9 @@ public void testGetUDTFFunction() throws SemanticException {
4146
RelDataTypeFactory typeFactory = new JavaTypeFactoryImpl();
4247
RexBuilder rexBuilder = new RexBuilder(typeFactory);
4348
List<RexNode> operands =
44-
Lists.newArrayList(rexBuilder.makeLiteral("hello"), rexBuilder.makeLiteral("world"));
49+
Lists.newArrayList(rexBuilder.makeLiteral("hello"), rexBuilder.makeLiteral("world"));
4550
List<RexNode> arrayNode =
46-
Lists.newArrayList(rexBuilder.makeCall(SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR, operands));
51+
Lists.newArrayList(rexBuilder.makeCall(SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR, operands));
4752

4853
FunctionHelper functionHelper = new HiveFunctionHelper(rexBuilder);
4954
RexCall explodeNode = (RexCall) functionHelper.getUDTFFunction("explode", arrayNode);
@@ -62,4 +67,28 @@ public void testGetUDTFFunctionThrowingException() throws SemanticException {
6267
// 'upper' is not a udtf so should throw exception
6368
functionHelper.getUDTFFunction("upper", operands);
6469
}
70+
71+
@Test
72+
public void testCoalesceWithComponentAccessDoesNotAssert() throws SemanticException {
73+
RelDataTypeFactory typeFactory = new JavaTypeFactoryImpl();
74+
RexBuilder rexBuilder = new RexBuilder(typeFactory);
75+
FunctionHelper functionHelper = new HiveFunctionHelper(rexBuilder);
76+
77+
// Simulate nested field access over a collection, which is represented in Calcite by
78+
// a COMPONENT_ACCESS operator and can appear inside NVL/COALESCE rewrites to CASE.
79+
RexNode array =
80+
rexBuilder.makeCall(SqlStdOperatorTable.ARRAY_VALUE_CONSTRUCTOR,
81+
Lists.newArrayList(rexBuilder.makeLiteral("hello")));
82+
RexNode componentAccess =
83+
rexBuilder.makeCall(array.getType().getComponentType(), HiveComponentAccess.COMPONENT_ACCESS,
84+
Lists.newArrayList(array));
85+
86+
FunctionInfo fi = functionHelper.getFunctionInfo("coalesce");
87+
List<RexNode> inputs = Lists.newArrayList(componentAccess, rexBuilder.makeNullLiteral(componentAccess.getType()));
88+
RexNode expr = functionHelper.getExpression("coalesce", fi, inputs, componentAccess.getType());
89+
assertNotNull(expr);
90+
assertTrue(expr instanceof RexCall);
91+
// COALESCE is rewritten to CASE; the stateful-functions checker walks this tree.
92+
assertEquals(SqlKind.CASE, ((RexCall) expr).getOperator().getKind());
93+
}
6594
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
-- HIVE-29538: AssertionError in StatefulFunctionsChecker when Calcite introduces COMPONENT_ACCESS
2+
3+
-- Single-level nesting: array-of-struct field projection uses COMPONENT_ACCESS.
4+
CREATE TABLE cbo_component_access_if_tbl (
5+
`jobs` array<struct<code:string>>
6+
) STORED AS ORC;
7+
8+
-- `if(...)` is rewritten to CASE and triggers checkForStatefulFunctions.
9+
EXPLAIN CBO
10+
SELECT if(concat_ws(',', `jobs`.code) = '', null, concat_ws(',', `jobs`.code)) AS codes
11+
FROM cbo_component_access_if_tbl;
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
PREHOOK: query: CREATE TABLE cbo_component_access_if_tbl (
2+
`jobs` array<struct<code:string>>
3+
) STORED AS ORC
4+
PREHOOK: type: CREATETABLE
5+
PREHOOK: Output: database:default
6+
PREHOOK: Output: default@cbo_component_access_if_tbl
7+
POSTHOOK: query: CREATE TABLE cbo_component_access_if_tbl (
8+
`jobs` array<struct<code:string>>
9+
) STORED AS ORC
10+
POSTHOOK: type: CREATETABLE
11+
POSTHOOK: Output: database:default
12+
POSTHOOK: Output: default@cbo_component_access_if_tbl
13+
PREHOOK: query: EXPLAIN CBO
14+
SELECT if(concat_ws(',', `jobs`.code) = '', null, concat_ws(',', `jobs`.code)) AS codes
15+
FROM cbo_component_access_if_tbl
16+
PREHOOK: type: QUERY
17+
PREHOOK: Input: default@cbo_component_access_if_tbl
18+
#### A masked pattern was here ####
19+
POSTHOOK: query: EXPLAIN CBO
20+
SELECT if(concat_ws(',', `jobs`.code) = '', null, concat_ws(',', `jobs`.code)) AS codes
21+
FROM cbo_component_access_if_tbl
22+
POSTHOOK: type: QUERY
23+
POSTHOOK: Input: default@cbo_component_access_if_tbl
24+
#### A masked pattern was here ####
25+
CBO PLAN:
26+
HiveProject(codes=[CASE(=(concat_ws(_UTF-16LE',':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", COMPONENT_ACCESS($0).code), _UTF-16LE''), null:VARCHAR(2147483647) CHARACTER SET "UTF-16LE", concat_ws(_UTF-16LE',':VARCHAR(2147483647) CHARACTER SET "UTF-16LE", COMPONENT_ACCESS($0).code))])
27+
HiveTableScan(table=[[default, cbo_component_access_if_tbl]], table:alias=[cbo_component_access_if_tbl])
28+

0 commit comments

Comments
 (0)