Skip to content
Draft

Len str #61707

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.doris.nereids.trees.plans.physical.PhysicalProject;
import org.apache.doris.nereids.trees.plans.physical.PhysicalTVFRelation;
import org.apache.doris.nereids.trees.plans.physical.PhysicalTopN;
import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter;
import org.apache.doris.qe.SessionVariable;

import com.google.common.collect.BiMap;
Expand All @@ -56,39 +57,27 @@
* post rule to do lazy materialize
*/
public class LazyMaterializeTopN extends PlanPostProcessor {
/* BE do not support pattern:
union
-->materialize
-->topn
-->scan1
-->materialize
-->topn
-->scan2
when we create materializeNode for the first union child, set hasMaterialized=true
to avoid generating materializeNode for other union's children
*/
private static final Logger LOG = LogManager.getLogger(LazyMaterializeTopN.class);
private boolean hasMaterialized = false;

@Override
public Plan visitPhysicalTopN(PhysicalTopN topN, CascadesContext ctx) {
// Visit children first (bottom-up) so that TopN nodes under union are processed independently
Plan topNWithNewChildren = DefaultPlanRewriter.visitChildren(this, topN, ctx);
PhysicalTopN topNToProcess = (PhysicalTopN) topNWithNewChildren;
try {
Plan result = computeTopN(topN, ctx);
Plan result = computeTopN(topNToProcess, ctx);
if (SessionVariable.isFeDebug()) {
Validator validator = new Validator();
validator.processRoot(result, ctx);
}
return result;
} catch (Exception e) {
LOG.warn("lazy materialize topn failed", e);
return topN;
return topNToProcess;
}
}

private Plan computeTopN(PhysicalTopN topN, CascadesContext ctx) {
if (hasMaterialized) {
return topN;
}
if (SessionVariable.getTopNLazyMaterializationThreshold() < topN.getLimit()) {
return topN;
}
Expand Down Expand Up @@ -183,7 +172,6 @@ private Plan computeTopN(PhysicalTopN topN, CascadesContext ctx) {
result = new PhysicalLazyMaterialize(result, result.getOutput(),
materializedSlots, relationToLazySlotMap, relationToRowId, materializeMap,
null, ((AbstractPlan) result).getStats());
hasMaterialized = true;
} else {
/*
topn
Expand All @@ -206,7 +194,6 @@ private Plan computeTopN(PhysicalTopN topN, CascadesContext ctx) {
result = new PhysicalLazyMaterialize(result, materializeInput,
reOrderedMaterializedSlots, relationToLazySlotMap, relationToRowId, materializeMap,
null, ((AbstractPlan) result).getStats());
hasMaterialized = true;
}
result = new PhysicalProject(originOutput, null, result);
return result;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import org.apache.doris.nereids.trees.expressions.functions.scalar.ArraySplit;
import org.apache.doris.nereids.trees.expressions.functions.scalar.ElementAt;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Lambda;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Length;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsEntry;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsKey;
import org.apache.doris.nereids.trees.expressions.functions.scalar.MapContainsValue;
Expand Down Expand Up @@ -127,9 +128,42 @@ public Void visitSlotReference(SlotReference slotReference, CollectorContext con
int slotId = slotReference.getExprId().asInt();
slotToAccessPaths.put(slotId, new CollectAccessPathResult(path, context.bottomFilter, context.type));
}
if (dataType.isStringLikeType()) {
int slotId = slotReference.getExprId().asInt();
if (!context.accessPathBuilder.isEmpty()) {
// Accessed via an offset-only function (e.g. length()).
// Builder already has "offset" at the tail; add the column name as prefix.
context.accessPathBuilder.addPrefix(slotReference.getName());
ImmutableList<String> path = ImmutableList.copyOf(context.accessPathBuilder.accessPath);
slotToAccessPaths.put(slotId,
new CollectAccessPathResult(path, context.bottomFilter, TAccessPathType.DATA));
} else {
// Direct access to the string column → record a DATA path so that any
// concurrent offset-only path for the same slot is suppressed.
List<String> path = ImmutableList.of(slotReference.getName());
slotToAccessPaths.put(slotId,
new CollectAccessPathResult(path, context.bottomFilter, TAccessPathType.DATA));
}
}
return null;
}

@Override
public Void visitLength(Length length, CollectorContext context) {
Expression arg = length.child(0);
// length() only needs the offset array, not the chars data.
// Add ACCESS_STRING_OFFSET as a suffix so the path builder accumulates
// e.g. ["str_col", "offset"] or ["c_struct", "f3", "offset"].
if (arg.getDataType().isStringLikeType() && context.accessPathBuilder.isEmpty()) {
CollectorContext offsetContext =
new CollectorContext(context.statementContext, context.bottomFilter);
offsetContext.accessPathBuilder.addSuffix(AccessPathInfo.ACCESS_STRING_OFFSET);
return arg.accept(this, offsetContext);
}
// fall through to default (recurse into children with fresh contexts)
return visit(length, context);
}

@Override
public Void visitArrayItemSlot(ArrayItemSlot arrayItemSlot, CollectorContext context) {
if (nameToLambdaArguments.isEmpty()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ public class AccessPathInfo {
public static final String ACCESS_ALL = "*";
public static final String ACCESS_MAP_KEYS = "KEYS";
public static final String ACCESS_MAP_VALUES = "VALUES";
// Suffix appended to a string-column path to indicate that only the offset array
// (not the char data) is needed — agreed with BE as the special path component name.
public static final String ACCESS_STRING_OFFSET = "OFFSET";

private DataType prunedType;
// allAccessPaths is used to record all access path include predicate access path and non-predicate access path,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ public Map<Slot, List<CollectAccessPathResult>> collect(Plan root, StatementCont
}

private boolean shouldCollectAccessPath(Slot slot) {
return slot.getDataType() instanceof NestedColumnPrunable || slot.getDataType().isVariantType();
return slot.getDataType() instanceof NestedColumnPrunable
|| slot.getDataType().isVariantType()
|| slot.getDataType().isStringLikeType();
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.Slot;
import org.apache.doris.nereids.trees.expressions.SlotReference;
import org.apache.doris.nereids.trees.expressions.functions.scalar.Length;
import org.apache.doris.nereids.trees.plans.Plan;
import org.apache.doris.nereids.trees.plans.visitor.CustomRewriter;
import org.apache.doris.nereids.types.ArrayType;
Expand Down Expand Up @@ -80,7 +81,9 @@ public Plan rewriteRoot(Plan plan, JobContext jobContext) {
StatementContext statementContext = jobContext.getCascadesContext().getStatementContext();
SessionVariable sessionVariable = statementContext.getConnectContext().getSessionVariable();
if (!sessionVariable.enablePruneNestedColumns
|| (!statementContext.hasNestedColumns() && !containsVariant(plan))) {
|| (!statementContext.hasNestedColumns()
&& !containsVariant(plan)
&& !(sessionVariable.enableSubColumnMetaAccess && containsStringLength(plan)))) {
return plan;
}

Expand All @@ -104,6 +107,38 @@ public Plan rewriteRoot(Plan plan, JobContext jobContext) {
}
}

/** Returns true when the plan tree contains length() applied to a string-type expression.
* Used in the early-exit guard so that string offset optimizations are not skipped even
* when no nested (struct/array/map) or variant columns are present. */
private static boolean containsStringLength(Plan plan) {
AtomicBoolean found = new AtomicBoolean(false);
plan.foreachUp(node -> {
if (found.get()) {
return;
}
Plan current = (Plan) node;
for (Expression expression : current.getExpressions()) {
if (expressionContainsStringLength(expression)) {
found.set(true);
return;
}
}
});
return found.get();
}

private static boolean expressionContainsStringLength(Expression expr) {
if (expr instanceof Length && expr.child(0).getDataType().isStringLikeType()) {
return true;
}
for (Expression child : expr.children()) {
if (expressionContainsStringLength(child)) {
return true;
}
}
return false;
}

private static boolean containsVariant(Plan plan) {
AtomicBoolean hasVariant = new AtomicBoolean(false);
plan.foreachUp(node -> {
Expand Down Expand Up @@ -183,6 +218,18 @@ private static Map<Integer, AccessPathInfo> pruneDataType(
DataTypeAccessTree accessTree = kv.getValue();
DataType prunedDataType = accessTree.pruneDataType().orElse(slot.getDataType());

if (slot.getDataType().isStringLikeType()) {
if (accessTree.hasStringOffsetOnlyAccess()) {
// Offset-only access (e.g. length(str_col)): type stays varchar,
// but we must still send the access path to BE so it skips the char data.
List<TColumnAccessPath> allPaths = buildColumnAccessPaths(slot, allAccessPaths);
result.put(slot.getExprId().asInt(),
new AccessPathInfo(slot.getDataType(), allPaths, new ArrayList<>()));
}
// direct access (accessAll=true) or other: skip — no type change, no access paths needed.
continue;
}

List<TColumnAccessPath> allPaths = buildColumnAccessPaths(slot, allAccessPaths);
result.put(slot.getExprId().asInt(),
new AccessPathInfo(prunedDataType, allPaths, new ArrayList<>()));
Expand All @@ -202,15 +249,19 @@ private static Map<Integer, AccessPathInfo> pruneDataType(
List<TColumnAccessPath> predicatePaths =
buildColumnAccessPaths(slot, predicateAccessPaths);
AccessPathInfo accessPathInfo = result.get(slot.getExprId().asInt());
accessPathInfo.getPredicateAccessPaths().addAll(predicatePaths);
if (accessPathInfo != null) {
accessPathInfo.getPredicateAccessPaths().addAll(predicatePaths);
}
}

for (Entry<Slot, DataType> kv : variantSlots.entrySet()) {
Slot slot = kv.getKey();
List<TColumnAccessPath> predicatePaths =
buildColumnAccessPaths(slot, predicateAccessPaths);
AccessPathInfo accessPathInfo = result.get(slot.getExprId().asInt());
accessPathInfo.getPredicateAccessPaths().addAll(predicatePaths);
if (accessPathInfo != null) {
accessPathInfo.getPredicateAccessPaths().addAll(predicatePaths);
}
}

return result;
Expand Down Expand Up @@ -271,6 +322,11 @@ public static class DataTypeAccessTree {
// if access 's.a.b' the node 's' and 'a' has accessPartialChild, and node 'b' has accessAll
private boolean accessPartialChild;
private boolean accessAll;
// True when this string-typed node is accessed ONLY via the offset array
// (e.g. length(str_col) or length(element_at(c_struct,'f3'))).
// When this flag is set and accessAll is NOT set, pruneDataType() returns BigIntType
// to signal that the BE only needs to read the offset array, not the chars data.
private boolean isStringOffsetOnly;
// for the future, only access the meta of the column,
// e.g. `is not null` can only access the column's offset, not need to read the data
private TAccessPathType pathType;
Expand Down Expand Up @@ -312,6 +368,16 @@ public Map<String, DataTypeAccessTree> getChildren() {
return children;
}

/** True when the string column is accessed ONLY via the offset array (e.g. length(str_col)),
* meaning the type must not change but an access path still needs to be sent to BE. */
public boolean hasStringOffsetOnlyAccess() {
if (isRoot) {
DataTypeAccessTree child = children.values().iterator().next();
return child.type.isStringLikeType() && child.isStringOffsetOnly && !child.accessAll;
}
return type.isStringLikeType() && isStringOffsetOnly && !accessAll;
}

/** pruneCastType */
public DataType pruneCastType(DataTypeAccessTree origin, DataTypeAccessTree cast) {
if (type instanceof StructType) {
Expand Down Expand Up @@ -447,6 +513,16 @@ public void setAccessByPath(List<String> path, int accessIndex, TAccessPathType
valuesChild.setAccessByPath(path, accessIndex + 1, pathType);
return;
}
} else if (type.isStringLikeType()) {
// String leaf accessed via the offset array (e.g. path ends in "offset").
// Mark offset-only so pruneDataType() can return BigIntType instead of full data.
if (path.get(accessIndex).equals(AccessPathInfo.ACCESS_STRING_OFFSET)) {
isStringOffsetOnly = true;
return; // do NOT set accessAll — offset-only is distinguishable from full access
}
// Any other sub-path on a string column means full data is needed.
accessAll = true;
return;
} else if (isRoot) {
children.get(path.get(accessIndex).toLowerCase()).setAccessByPath(path, accessIndex + 1, pathType);
return;
Expand Down Expand Up @@ -484,6 +560,10 @@ public Optional<DataType> pruneDataType() {
return children.values().iterator().next().pruneDataType();
} else if (accessAll) {
return Optional.of(type);
} else if (isStringOffsetOnly) {
// Only the offset array is accessed (e.g. length(str_col)).
// The slot type stays unchanged (varchar); the access path tells BE to skip char data.
return Optional.empty();
} else if (!accessPartialChild) {
return Optional.empty();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -724,7 +724,8 @@ private void tryRecordReplaceSlots(Plan plan, Object checkObj, Set<Integer> shou
for (Slot slot : output) {
int slotId = slot.getExprId().asInt();
if ((slot.getDataType() instanceof NestedColumnPrunable
|| slot.getDataType().isVariantType())
|| slot.getDataType().isVariantType()
|| slot.getDataType().isStringLikeType())
&& replacedDataTypes.containsKey(slotId)) {
shouldReplaceSlots.add(slotId);
shouldPrune = true;
Expand Down
12 changes: 12 additions & 0 deletions fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ public class SessionVariable implements Serializable, Writable {
"enable_runtime_filter_partition_prune";

public static final String ENABLE_PRUNE_NESTED_COLUMN = "enable_prune_nested_column";
public static final String ENABLE_SUB_COLUMN_META_ACCESS = "enable_sub_column_meta_access";

static final String SESSION_CONTEXT = "session_context";

Expand Down Expand Up @@ -1683,6 +1684,17 @@ public enum IgnoreSplitType {
)
public boolean enablePruneNestedColumns = true;

@VariableMgr.VarAttr(name = ENABLE_SUB_COLUMN_META_ACCESS, needForward = true,
fuzzy = false,
varType = VariableAnnotation.EXPERIMENTAL,
description = {
"是否开启子列 meta 访问优化(如 length(str) 只读 offset、IS NULL 只读 null flag)",
"Whether to enable sub-column meta access optimization "
+ "(e.g. length(str) reads only offsets, IS NULL reads only null flags)"
}
)
public boolean enableSubColumnMetaAccess = true;

public boolean enableTopnLazyMaterialization() {
return ConnectContext.get() != null
&& ConnectContext.get().getSessionVariable().topNLazyMaterializationThreshold > 0;
Expand Down
Loading
Loading