Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
e0b6d21
JIT: PGO value-profiling for non-constant zero-init stackalloc
EgorBo May 8, 2026
f32bbff
Address Copilot reviewer feedback
EgorBo May 8, 2026
fe8750a
Add JitMetrics counters and tighten profiled LCLHEAP guard
EgorBo May 8, 2026
5a3d7d1
Refactor impDuplicateWithProfiledArg to use pickProfiledValue
EgorBo May 8, 2026
a712e10
Promote tiny-popular-value LCLHEAP fast paths to a local
EgorBo May 8, 2026
fec094d
Set Qmark then-branch likelihood from profiled likelihood
EgorBo May 8, 2026
305e88c
Skip LCLHEAP value-profile specialization for sizes <= 32 bytes
EgorBo May 8, 2026
1fcbcb2
clean up
EgorBo May 8, 2026
c1a0879
Address remaining Copilot reviewer feedback
EgorBo May 8, 2026
ea55d69
Use GTF_ORDER_SIDEEFF instead of GTF_EXCEPT for GT_LCLHEAP
EgorBo May 8, 2026
a57f9bd
Potential fix for pull request finding
EgorBo May 8, 2026
c1f31c6
Potential fix for pull request finding
EgorBo May 8, 2026
c7bf978
Use unsigned widening for value-profile length cast
EgorBo May 8, 2026
5bad7ef
Block GT_LCLHEAP if-conversion via cost rather than GTF_ORDER_SIDEEFF
EgorBo May 9, 2026
d2fbd77
[DO NOT MERGE] Disable global SkipLocalsInit default to stress CI
EgorBo May 9, 2026
1e17a51
[DO NOT MERGE] Fix CI: keep AllowUnsafeBlocks side-effect when stress…
EgorBo May 9, 2026
f4de676
clean up
EgorBo May 9, 2026
c1f7f86
Generalize value-profile guarded specialization into one helper
EgorBo May 9, 2026
c7f908e
Spill all side-effecting siblings via UseEdges() iterator
EgorBo May 9, 2026
6ead091
Address Copilot reviewer: gate call candidacy on non-null candidate info
EgorBo May 9, 2026
f392b2c
Fix CI: don't overwrite gtInlineCandidateInfo via union when marking …
EgorBo May 9, 2026
03c6f8f
Merge branch 'main' into lclheap-pgo-value-probing
EgorBo May 10, 2026
c6e3250
Update Directory.Build.targets
EgorBo May 10, 2026
06a25be
Merge branch 'main' into lclheap-pgo-value-probing
EgorBo May 10, 2026
b10e52d
Address remaining Copilot reviewer feedback
EgorBo May 10, 2026
51dee3a
Merge branch 'main' into lclheap-pgo-value-probing
EgorBo May 12, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3220,6 +3220,21 @@ class Compiler
GenTreeColon* gtNewColonNode(var_types type, GenTree* thenNode, GenTree* elseNode);
GenTreeQmark* gtNewQmarkNode(var_types type, GenTree* cond, GenTreeColon* colon);

GenTreeOpWithILOffset* gtNewLclHeapNode(GenTree* size, IL_OFFSET ilOffset);

bool pickProfiledValue(IL_OFFSET ilOffset, uint32_t* pLikelihood, ssize_t* pValue);

//------------------------------------------------------------------------
// IsValueHistogramProbeCandidate: Determine if a node is a value-histogram
// probe candidate, and report the IL offset and operand to profile.
//
// Centralized so the importer (impProfile*), the value-instrumentor visitor,
// schema-builder and probe-inserter all agree on which trees are value-probe
// candidates and where their profiled operand lives. To extend value
// profiling to a new node kind, add a case here.
//
bool IsValueHistogramProbeCandidate(GenTree* node, IL_OFFSET* ilOffset = nullptr, GenTree*** operandUseRef = nullptr);

GenTree* gtNewLargeOperNode(genTreeOps oper,
var_types type = TYP_I_IMPL,
GenTree* op1 = nullptr,
Expand Down Expand Up @@ -5087,7 +5102,12 @@ class Compiler

GenTree* impFixupStructReturnType(GenTree* op);

GenTree* impDuplicateWithProfiledArg(GenTreeCall* call, IL_OFFSET ilOffset);
GenTree* impProfileValueGuardedTree(GenTree* node,
GenTree** operandRef,
IL_OFFSET ilOffset,
ssize_t minProfitable,
ssize_t maxProfitable,
int* successMetric DEBUGARG(const char* tmpName));

GenTree* impThrowIfNull(GenTreeCall* call);

Expand Down
138 changes: 106 additions & 32 deletions src/coreclr/jit/fgprofile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1893,6 +1893,78 @@ class HandleHistogramProbeVisitor final : public GenTreeVisitor<HandleHistogramP
}
};

//------------------------------------------------------------------------
// Compiler::IsValueHistogramProbeCandidate: see compiler.h for description.
//
bool Compiler::IsValueHistogramProbeCandidate(GenTree* node, IL_OFFSET* ilOffset, GenTree*** operandUseRef)
{
if (node->OperIs(GT_LCLHEAP))
{
// Mirror the gating at the impProfileValueGuardedTree call site for
// CEE_LOCALLOC (see Compiler::impImportBlockCode): only zero-init,
// non-constant locallocs are value-profile candidates. Anything else
// would have its probe data ignored by the consumer and just waste a
// schema slot.
if (!info.compInitMem || node->gtGetOp1()->OperIsConst())
{
return false;
}

if (ilOffset != nullptr)
{
*ilOffset = node->AsOpWithILOffset()->GetILOffset();
}
if (operandUseRef != nullptr)
{
*operandUseRef = &node->AsOp()->gtOp1;
}
return true;
}
Comment thread
EgorBo marked this conversation as resolved.

if (node->IsCall() && node->AsCall()->IsSpecialIntrinsic())
{
GenTreeCall* const call = node->AsCall();

// gtHandleHistogramProfileCandidateInfo aliases gtInlineCandidateInfo /
// gtInlineCandidateInfoList / gtCallCookie / gtDirectCallAddress in a union
// (see GenTreeCall in gentree.h). If the call is an inline or GDV candidate,
// the union slot holds inline metadata, NOT a HandleHistogramProfileCandidateInfo,
// so a non-null pointer here would not be a real value-profile marker.
// Skip such calls to avoid reading inline metadata as histogram metadata.
if (call->IsInlineCandidate() || call->IsGuardedDevirtualizationCandidate())
{
return false;
}

// Require gtHandleHistogramProfileCandidateInfo to be set: that's the
// explicit "the importer marked this site for value profiling" signal.
// Without this check, an inlined Memmove/SequenceEqual (which the importer
// intentionally skips) would still match purely on NamedIntrinsic and the
// visitor would later deref the null candidate info.
if (call->gtHandleHistogramProfileCandidateInfo == nullptr)
{
return false;
}

const NamedIntrinsic ni = lookupNamedIntrinsic(call->gtCallMethHnd);
if ((ni == NI_System_SpanHelpers_Memmove) || (ni == NI_System_SpanHelpers_SequenceEqual))
{
if (ilOffset != nullptr)
{
*ilOffset = call->gtHandleHistogramProfileCandidateInfo->ilOffset;
}
if (operandUseRef != nullptr)
{
// Memmove(dst, src, len) and SequenceEqual(left, right, len) -- profile len.
*operandUseRef = &call->gtArgs.GetUserArgByIndex(2)->EarlyNodeRef();
}
return true;
}
}

return false;
}

//------------------------------------------------------------------------
// ValueHistogramProbeVisitor: invoke functor on each node requiring a generic value probe
//
Expand All @@ -1918,13 +1990,9 @@ class ValueHistogramProbeVisitor final : public GenTreeVisitor<ValueHistogramPro
Compiler::fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
{
GenTree* const node = *use;
if (node->IsCall() && node->AsCall()->IsSpecialIntrinsic())
if (m_compiler->IsValueHistogramProbeCandidate(node))
{
const NamedIntrinsic ni = m_compiler->lookupNamedIntrinsic(node->AsCall()->gtCallMethHnd);
if ((ni == NI_System_SpanHelpers_Memmove) || (ni == NI_System_SpanHelpers_SequenceEqual))
{
m_functor(m_compiler, node);
}
m_functor(m_compiler, node);
}
return Compiler::WALK_CONTINUE;
}
Expand Down Expand Up @@ -2009,14 +2077,18 @@ class BuildValueHistogramProbeSchemaGen
{
}

void operator()(Compiler* compiler, GenTree* call)
void operator()(Compiler* compiler, GenTree* tree)
{
IL_OFFSET ilOffset = 0;
bool isCandidate = compiler->IsValueHistogramProbeCandidate(tree, &ilOffset);
assert(isCandidate);

ICorJitInfo::PgoInstrumentationSchema schemaElem = {};
schemaElem.Count = 1;
schemaElem.InstrumentationKind = compiler->opts.compCollect64BitCounts
? ICorJitInfo::PgoInstrumentationKind::ValueHistogramLongCount
: ICorJitInfo::PgoInstrumentationKind::ValueHistogramIntCount;
schemaElem.ILOffset = (int32_t)call->AsCall()->gtHandleHistogramProfileCandidateInfo->ilOffset;
schemaElem.ILOffset = static_cast<int32_t>(ilOffset);
m_schema.push_back(schemaElem);
m_schemaCount++;

Expand Down Expand Up @@ -2250,12 +2322,13 @@ class ValueHistogramProbeInserter
return;
}

assert(node->AsCall()->IsSpecialIntrinsic(compiler, NI_System_SpanHelpers_Memmove) ||
node->AsCall()->IsSpecialIntrinsic(compiler, NI_System_SpanHelpers_SequenceEqual));
IL_OFFSET candidateIlOffset = 0;
GenTree** operandUseRef = nullptr;
bool isCandidate = compiler->IsValueHistogramProbeCandidate(node, &candidateIlOffset, &operandUseRef);
assert(isCandidate);

const ICorJitInfo::PgoInstrumentationSchema& countEntry = m_schema[*m_currentSchemaIndex];
if (countEntry.ILOffset !=
static_cast<int32_t>(node->AsCall()->gtHandleHistogramProfileCandidateInfo->ilOffset))
if (countEntry.ILOffset != static_cast<int32_t>(candidateIlOffset))
{
return;
}
Expand All @@ -2275,30 +2348,31 @@ class ValueHistogramProbeInserter

*m_currentSchemaIndex += 2;

GenTree** lenArgRef = &node->AsCall()->gtArgs.GetUserArgByIndex(2)->EarlyNodeRef();

// We have Memmove(dst, src, len) and we want to insert a call to CORINFO_HELP_VALUEPROFILE for the len:
//
// \--* COMMA long
// +--* CALL help void CORINFO_HELP_VALUEPROFILE
// | +--* COMMA long
// | | +--* STORE_LCL_VAR long tmp
// | | | \--* (node to poll)
// | | \--* LCL_VAR long tmp
// | \--* CNS_INT long <hist>
// \--* LCL_VAR long tmp
// Inject CORINFO_HELP_VALUEPROFILE helper call to record the value in the histogram.
// The helper call is injected as a comma node that stores the value to a temp.
//

const unsigned lenTmpNum = compiler->lvaGrabTemp(true DEBUGARG("length histogram profile tmp"));
GenTree* storeLenToTemp = compiler->gtNewTempStore(lenTmpNum, *lenArgRef);
GenTree* lengthLocal = compiler->gtNewLclvNode(lenTmpNum, genActualType(*lenArgRef));
GenTreeOp* lengthNode = compiler->gtNewOperNode(GT_COMMA, lengthLocal->TypeGet(), storeLenToTemp, lengthLocal);
GenTree* histNode = compiler->gtNewIconNode(reinterpret_cast<ssize_t>(hist), TYP_I_IMPL);
unsigned helper = is32 ? CORINFO_HELP_VALUEPROFILE32 : CORINFO_HELP_VALUEPROFILE64;
GenTree* storeLenToTemp = compiler->gtNewTempStore(lenTmpNum, *operandUseRef);
GenTree* lengthLocal = compiler->gtNewLclvNode(lenTmpNum, genActualType(*operandUseRef));
GenTree* lengthNode = compiler->gtNewOperNode(GT_COMMA, lengthLocal->TypeGet(), storeLenToTemp, lengthLocal);
GenTree* histNode = compiler->gtNewIconNode(reinterpret_cast<ssize_t>(hist), TYP_I_IMPL);
unsigned helper = is32 ? CORINFO_HELP_VALUEPROFILE32 : CORINFO_HELP_VALUEPROFILE64;
Comment thread
EgorBo marked this conversation as resolved.

if (!lengthNode->TypeIs(TYP_I_IMPL))
{
// CORINFO_HELP_VALUEPROFILE always expects nint, but the operand here may be
// a 32-bit value (e.g. the int-typed size operand of GT_LCLHEAP, or a TYP_INT
// length on 64-bit). Zero-extend to nint so the recorded histogram values are
// unsigned and not skewed by sign extension; sign-extension can be added later
// for operands where it makes sense.
assert(genActualType(lengthNode) == TYP_INT);
lengthNode = compiler->gtNewCastNode(TYP_I_IMPL, lengthNode, /*fromUnsigned*/ true, TYP_I_IMPL);
}
Comment thread
EgorBo marked this conversation as resolved.

GenTreeCall* helperCallNode = compiler->gtNewHelperCallNode(helper, TYP_VOID, lengthNode, histNode);

*lenArgRef = compiler->gtNewOperNode(GT_COMMA, lengthLocal->TypeGet(), helperCallNode,
compiler->gtCloneExpr(lengthLocal));
*operandUseRef = compiler->gtNewOperNode(GT_COMMA, lengthLocal->TypeGet(), helperCallNode,
compiler->gtCloneExpr(lengthLocal));
m_instrCount++;
}
};
Expand Down
48 changes: 48 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2874,6 +2874,7 @@ bool GenTree::Compare(GenTree* op1, GenTree* op2, bool swapOK)
break;

// For the ones below no extra argument matters for comparison.
case GT_LCLHEAP:
case GT_BOX:
case GT_RUNTIMELOOKUP:
case GT_ARR_ADDR:
Expand Down Expand Up @@ -3447,6 +3448,7 @@ unsigned Compiler::gtHashValue(GenTree* tree)
break;

// For the ones below no extra argument matters for comparison.
case GT_LCLHEAP:
case GT_BOX:
case GT_ARR_ADDR:
break;
Expand Down Expand Up @@ -6919,6 +6921,37 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
costSz = 2 * 2;
break;

case GT_LCLHEAP:
{
// GT_LCLHEAP is more expensive than a typical unary op (it adjusts
// SP, may probe the stack, and -- under compInitMem -- zeros the
// allocated range). Give it a high cost so phases like if-conversion
// (which has a CostEx > 7 cutoff) won't speculatively evaluate it
// unconditionally.
//
// For constant sizes the zero-init is unrolled by Lowering, so the
// cost scales with the size; for non-constant sizes (or sizes that
// would overflow our int cost) we use a fixed upper bound representing
// the runtime loop.
costEx = 36;
costSz = 8;
if (op1->IsCnsIntOrI() && info.compInitMem)
{
const ssize_t size = op1->AsIntCon()->IconValue();
// Guard against pathological constant sizes: very large or
// non-positive values would overflow the cost computation
// and could underflow into a small/negative cost, defeating
// the if-conversion block above.
if ((size > 0) && (size <= INT_MAX))
{
Comment thread
EgorBo marked this conversation as resolved.
const ssize_t alignedSize = (size + (STACK_ALIGN - 1)) & ~(ssize_t)(STACK_ALIGN - 1);
costEx = 8 + (int)(alignedSize / REGSIZE_BYTES); // > 7 to block if-conversion
costSz = 4 + (int)(alignedSize / REGSIZE_BYTES);
}
}
break;
}

case GT_ARR_ADDR:
costEx = 0;
costSz = 0;
Expand Down Expand Up @@ -9031,6 +9064,16 @@ GenTreeQmark* Compiler::gtNewQmarkNode(var_types type, GenTree* cond, GenTreeCol
return result;
}

GenTreeOpWithILOffset* Compiler::gtNewLclHeapNode(GenTree* size, IL_OFFSET ilOffset)
{
assert(size != nullptr);
GenTreeOpWithILOffset* node =
new (this, GT_LCLHEAP) GenTreeOpWithILOffset(GT_LCLHEAP, TYP_I_IMPL, size, nullptr, ilOffset);
// Don't allow CSE to share locallocs.
node->gtFlags |= GTF_DONT_CSE;
return node;
}

GenTreeIntCon* Compiler::gtNewIconNode(ssize_t value, var_types type)
{
assert(genActualType(type) == type);
Expand Down Expand Up @@ -11143,6 +11186,11 @@ GenTree* Compiler::gtCloneExpr(GenTree* tree)
}
break;

case GT_LCLHEAP:
copy = new (this, GT_LCLHEAP) GenTreeOpWithILOffset(GT_LCLHEAP, tree->TypeGet(), tree->gtGetOp1(),
nullptr, tree->AsOpWithILOffset()->GetILOffset());
break;

case GT_SWIFT_ERROR_RET:
copy = new (this, oper) GenTreeOp(oper, tree->TypeGet(), tree->gtGetOp1(), tree->gtGetOp2());
break;
Expand Down
44 changes: 43 additions & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -3250,7 +3250,6 @@ struct GenTreeOp : public GenTreeUnOp
// Unary operators with optional arguments:
assert(oper == GT_RETURN || oper == GT_RETFILT || OperIsBlk(oper));
}

// returns true if we will use the division by constant optimization for this node.
bool UsesDivideByConstOptimized(Compiler* comp);

Expand Down Expand Up @@ -3316,6 +3315,49 @@ struct GenTreeOp : public GenTreeUnOp
#endif
};

// GenTreeOpWithILOffset - a GenTreeOp that additionally carries an IL offset.
//
// Used by JIT phases that need to associate a node with a specific IL offset
// independently of any debug-info side tables. In particular, it allows
// non-call value-profile probe candidates (e.g. GT_LCLHEAP) to participate
// in the same value-histogram instrumentation pipeline as calls without a
// side hash table.
//
struct GenTreeOpWithILOffset : public GenTreeOp
{
private:
IL_OFFSET gtILOffset;

public:
IL_OFFSET GetILOffset() const
{
return gtILOffset;
}

void SetILOffset(IL_OFFSET ilOffset)
{
gtILOffset = ilOffset;
}

GenTreeOpWithILOffset(genTreeOps oper,
var_types type,
GenTree* op1,
GenTree* op2,
IL_OFFSET ilOffset DEBUGARG(bool largeNode = false))
: GenTreeOp(oper, type, op1, op2 DEBUGARG(largeNode))
, gtILOffset(ilOffset)
{
}

#if DEBUGGABLE_GENTREE
GenTreeOpWithILOffset()
: GenTreeOp()
, gtILOffset(BAD_IL_OFFSET)
{
}
#endif
};

struct GenTreeVal : public GenTree
{
size_t gtVal1;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/gtlist.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ GTNODE(KEEPALIVE , GenTree ,0,0,GTK_UNOP|GTK_NOVALUE) // kee
GTNODE(CAST , GenTreeCast ,0,0,GTK_UNOP|GTK_EXOP) // conversion to another type
GTNODE(BITCAST , GenTreeOp ,0,1,GTK_UNOP) // reinterpretation of bits as another type
GTNODE(CKFINITE , GenTreeOp ,0,1,GTK_UNOP|DBK_NOCONTAIN) // Check for NaN
GTNODE(LCLHEAP , GenTreeOp ,0,1,GTK_UNOP|DBK_NOCONTAIN) // alloca()
GTNODE(LCLHEAP , GenTreeOpWithILOffset, 0,1,GTK_UNOP|GTK_EXOP|DBK_NOCONTAIN) // alloca()

GTNODE(BOUNDS_CHECK , GenTreeBoundsChk ,0,1,GTK_BINOP|GTK_EXOP|GTK_NOVALUE) // a bounds check - for arrays/spans/SIMDs/HWINTRINSICs

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/gtstructs.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ GTSTRUCT_1(Colon , GT_COLON)
GTSTRUCT_1(FptrVal , GT_FTN_ADDR)
GTSTRUCT_1(Intrinsic , GT_INTRINSIC)
GTSTRUCT_1(IndexAddr , GT_INDEX_ADDR)
GTSTRUCT_1(OpWithILOffset, GT_LCLHEAP)
#if defined(FEATURE_HW_INTRINSICS)
GTSTRUCT_N(MultiOp , GT_HWINTRINSIC)
#endif // FEATURE_HW_INTRINSICS
Expand Down
Loading
Loading