From be35506e93574f4dfbe8a9f5ab06aed89be8998c Mon Sep 17 00:00:00 2001 From: Lipeng Zhu Date: Wed, 27 May 2026 18:08:20 +0800 Subject: [PATCH] [VL] Skip escape arg when offloading Like with no-backslash pattern Spark's Like node always carries escapeChar (defaulting to '\') even when the SQL did not specify ESCAPE. Always sending the 3-arg form to Velox forces makeLike (Re2Functions.cpp) onto the escape-aware path: parsePattern runs an extra unescape pass and determinePatternKind runs with escapeChar.has_value() == true, even when no escaping is needed. When the pattern literal contains no '\', the 2-arg and 3-arg forms are semantically identical, so emit the cheaper 2-arg form. Velox already registers both signatures via likeSignatures(). Performance: TPC-H Q13 @ 6 TB shows >6% end-to-end latency reduction. With the 3-arg form, the constant-pattern fast paths in determinePatternKind are bypassed and Velox falls back to LikeWithRe2, hot-looping in re2::DFA::InlinedSearchLoop (>8% of total cycles on Q13). Sending the 2-arg form lets determinePatternKind dispatch '%special%requests%' to OptimizedLike, eliminating the RE2 DFA cost. Generated-by: Claude claude-opus-4-7 Co-authored-by: Guo Wangyang Co-authored-by: Hengrui Hu Signed-off-by: Lipeng Zhu --- .../backendsapi/velox/VeloxSparkPlanExecApi.scala | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala index b7a1e172b2c..5eb70b49e4d 100644 --- a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala +++ b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/VeloxSparkPlanExecApi.scala @@ -263,10 +263,15 @@ class VeloxSparkPlanExecApi extends SparkPlanExecApi with Logging { left: ExpressionTransformer, right: ExpressionTransformer, original: Like): ExpressionTransformer = { - GenericExpressionTransformer( - substraitExprName, - Seq(left, right, LiteralTransformer(original.escapeChar)), - original) + original match { + case Like(_, r: Literal, '\\') if !r.value.toString.contains('\\') => + GenericExpressionTransformer(substraitExprName, Seq(left, right), original) + case _ => + GenericExpressionTransformer( + substraitExprName, + Seq(left, right, LiteralTransformer(original.escapeChar)), + original) + } } /** Transform make_timestamp to Substrait. */