Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
c5cefd3
proof of concept
AndyAyersMS Feb 4, 2025
4d9f1bc
fix flags
AndyAyersMS Feb 4, 2025
e3c4019
enable by default (for now)
AndyAyersMS Feb 4, 2025
23b4261
enable for array allocations in loops too
AndyAyersMS Feb 5, 2025
d096d63
add missing bit of code
AndyAyersMS Feb 5, 2025
032f724
add handler check; fix elem size type
AndyAyersMS Feb 5, 2025
1e088c4
fix zero init logic
AndyAyersMS Feb 6, 2025
d572dec
merge main
AndyAyersMS Feb 20, 2025
526ba2f
update post merge, cleanup a bit
AndyAyersMS Feb 21, 2025
9e7cf8e
pad localloc array size as if it was on heap; handle align8
AndyAyersMS Feb 21, 2025
e247123
don't allow localloc for gc type arrays
AndyAyersMS Feb 22, 2025
f858501
Merge branch 'main' into StackallocLocalloc
AndyAyersMS Feb 22, 2025
f421343
simple runtime check for stack alloc vs heap alloc
AndyAyersMS Feb 21, 2025
45f648e
make new call instead of trying to hack up the old one
AndyAyersMS Feb 23, 2025
66a8ddd
Merge branch 'main' into StackallocLocalloc
AndyAyersMS Feb 24, 2025
39294f4
Merge remote-tracking branch 'origin/StackallocLocalloc' into Stackal…
AndyAyersMS Feb 25, 2025
5540b26
temp fix for linux x64 issue with misaligned frame
AndyAyersMS Feb 25, 2025
d7ac700
Merge branch 'StackallocLocalloc' into StackallocLocallocConditional2
AndyAyersMS Feb 25, 2025
7c7ff33
Merge remote-tracking branch 'upstream/main' into StackallocLocallocC…
AndyAyersMS May 8, 2026
7cc123c
Fix MorphNewArrNodeIntoStackAlloc body to match merged signature
AndyAyersMS May 8, 2026
cf5063e
codegenwasm: honor GTF_LCLHEAP_MUSTINIT in genLclHeap
AndyAyersMS May 8, 2026
ad2add4
Add Compiler::gtMustZeroLocalloc helper and use it at LCLHEAP sites
AndyAyersMS May 8, 2026
62f33f4
Remove unused locals after merge regression
AndyAyersMS May 9, 2026
47f47fe
Fix off-by-one in stack-array localloc size round-up
AndyAyersMS May 9, 2026
f1454bc
Restore variable-length newarr admission for localloc dispatch
AndyAyersMS May 9, 2026
e3fcb61
Keep localloc'd stack arrays only possibly-stack-pointing for GC trac…
AndyAyersMS May 9, 2026
0c3e303
Validate length before stack-array localloc to avoid memory corruption
AndyAyersMS May 9, 2026
0b27c02
Apply jit-format --fix --untidy
AndyAyersMS May 9, 2026
ebd2e51
Use length's actual type for stack-array localloc length check
AndyAyersMS May 9, 2026
5dee83b
Add regression tests for variable-length stack-array localloc
AndyAyersMS May 9, 2026
25128b6
Skip localloc dispatch when newarr result is unused
AndyAyersMS May 9, 2026
cf44e72
Merge branch 'main' into StackallocLocallocConditional2
AndyAyersMS May 10, 2026
739a362
Add per-frame budget for conditional stack allocations
AndyAyersMS May 11, 2026
1993d41
Address PR #127980 review feedback
AndyAyersMS May 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,7 @@ class CodeGen final : public CodeGenInterface

//-------------------------------------------------------------------------

bool genLocallocUsed; // true if we have used localloc in the method
bool genUseBlockInit; // true if we plan to block-initialize the local stack frame
unsigned genInitStkLclCnt; // The count of local variables that we need to zero init

Expand Down
6 changes: 4 additions & 2 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -391,6 +391,8 @@ void CodeGen::genLclHeap(GenTree* tree)
GenTree* size = tree->AsOp()->gtOp1;
noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));

bool const initMem = m_compiler->gtMustZeroLocalloc(tree);

// Result of localloc will be returned in regCnt.
// Also it used as temporary register in code generation
// for storing allocation size
Expand Down Expand Up @@ -476,7 +478,7 @@ void CodeGen::genLclHeap(GenTree* tree)

goto ALLOC_DONE;
}
else if (!m_compiler->info.compInitMem && (amount < m_compiler->eeGetPageSize())) // must be < not <=
else if (!initMem && (amount < m_compiler->eeGetPageSize())) // must be < not <=
{
// Since the size is less than a page, simply adjust the SP value.
// The SP might already be in the guard page, must touch it BEFORE
Expand All @@ -500,7 +502,7 @@ void CodeGen::genLclHeap(GenTree* tree)
}

// Allocation
if (m_compiler->info.compInitMem)
if (initMem)
{
// At this point 'regCnt' is set to the total number of bytes to localloc.
// Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid
Expand Down
12 changes: 6 additions & 6 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3081,15 +3081,15 @@ void CodeGen::genLclHeap(GenTree* tree)
noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
noway_assert(genStackLevel == 0); // Can't have anything on the stack

bool needsZeroing = m_compiler->info.compInitMem;
bool initMem = m_compiler->gtMustZeroLocalloc(tree);

// compute the amount of memory to allocate to properly STACK_ALIGN.
size_t amount = 0;
if (size->isContainedIntOrIImmed())
{
// The size node being a contained constant means that Lower has taken care of
// zeroing the memory if compInitMem is true.
needsZeroing = false;
initMem = false;

// If amount is zero then return null in targetReg
amount = size->AsIntCon()->gtIconVal;
Expand All @@ -3113,7 +3113,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// Compute the size of the block to allocate and perform alignment.
// If compInitMem=true, we can reuse targetReg as regcnt,
// since we don't need any internal registers.
if (needsZeroing)
if (initMem)
{
assert(internalRegisters.Count(tree) == 0);
regCnt = targetReg;
Expand Down Expand Up @@ -3200,10 +3200,10 @@ void CodeGen::genLclHeap(GenTree* tree)
}

// else, "mov regCnt, amount"
// If compInitMem=true, we can reuse targetReg as regcnt.
// If initMem=true, we can reuse targetReg as regcnt.
// Since size is a constant, regCnt is not yet initialized.
assert(regCnt == REG_NA);
if (needsZeroing)
if (initMem)
{
assert(internalRegisters.Count(tree) == 0);
regCnt = targetReg;
Expand All @@ -3215,7 +3215,7 @@ void CodeGen::genLclHeap(GenTree* tree)
instGen_Set_Reg_To_Imm(((unsigned int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount);
}

if (needsZeroing)
if (initMem)
{
BasicBlock* loop = genCreateTempLabel();

Expand Down
14 changes: 8 additions & 6 deletions src/coreclr/jit/codegenloongarch64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1461,6 +1461,8 @@ void CodeGen::genLclHeap(GenTree* tree)
noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
noway_assert(genStackLevel == 0); // Can't have anything on the stack

bool const initMem = m_compiler->gtMustZeroLocalloc(tree);

// compute the amount of memory to allocate to properly STACK_ALIGN.
size_t amount = 0;
if (size->IsCnsIntOrI())
Expand All @@ -1487,9 +1489,9 @@ void CodeGen::genLclHeap(GenTree* tree)
emit->emitIns_J_cond_la(INS_beq, endLabel, targetReg, REG_R0);

// Compute the size of the block to allocate and perform alignment.
// If compInitMem=true, we can reuse targetReg as regcnt,
// If initMem=true, we can reuse targetReg as regcnt,
// since we don't need any internal registers.
if (m_compiler->info.compInitMem)
if (initMem)
{
assert(internalRegisters.Count(tree) == 0);
regCnt = targetReg;
Expand Down Expand Up @@ -1541,7 +1543,7 @@ void CodeGen::genLclHeap(GenTree* tree)
static_assert(STACK_ALIGN == (REGSIZE_BYTES * 2));
assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time
size_t stpCount = amount / (REGSIZE_BYTES * 2);
if (m_compiler->info.compInitMem)
if (initMem)
{
if (stpCount <= 4)
{
Expand Down Expand Up @@ -1588,10 +1590,10 @@ void CodeGen::genLclHeap(GenTree* tree)
}

// else, "mov regCnt, amount"
// If compInitMem=true, we can reuse targetReg as regcnt.
// If initMem=true, we can reuse targetReg as regcnt.
// Since size is a constant, regCnt is not yet initialized.
assert(regCnt == REG_NA);
if (m_compiler->info.compInitMem)
if (initMem)
{
assert(internalRegisters.Count(tree) == 0);
regCnt = targetReg;
Expand All @@ -1603,7 +1605,7 @@ void CodeGen::genLclHeap(GenTree* tree)
instGen_Set_Reg_To_Imm(((unsigned int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount);
}

if (m_compiler->info.compInitMem)
if (initMem)
{
// At this point 'regCnt' is set to the total number of bytes to locAlloc.
// Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid
Expand Down
13 changes: 7 additions & 6 deletions src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1451,6 +1451,7 @@ void CodeGen::genLclHeap(GenTree* tree)
noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes
noway_assert(genStackLevel == 0); // Can't have anything on the stack

bool const initMem = m_compiler->gtMustZeroLocalloc(tree);
const target_size_t pageSize = m_compiler->eeGetPageSize();

// According to RISC-V Privileged ISA page size is 4KiB
Expand Down Expand Up @@ -1482,9 +1483,9 @@ void CodeGen::genLclHeap(GenTree* tree)
emit->emitIns_J_cond_la(INS_beq, endLabel, targetReg, REG_R0);

// Compute the size of the block to allocate and perform alignment.
// If compInitMem=true, we can reuse targetReg as regcnt,
// If initMem=true, we can reuse targetReg as regcnt,
// since we don't need any internal registers.
if (m_compiler->info.compInitMem)
if (initMem)
{
regCnt = targetReg;
}
Expand Down Expand Up @@ -1536,7 +1537,7 @@ void CodeGen::genLclHeap(GenTree* tree)
static_assert(STACK_ALIGN == (REGSIZE_BYTES * 2));
assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time
size_t stpCount = amount / (REGSIZE_BYTES * 2);
if (m_compiler->info.compInitMem)
if (initMem)
{
if (stpCount <= 4)
{
Expand Down Expand Up @@ -1585,10 +1586,10 @@ void CodeGen::genLclHeap(GenTree* tree)
}

// else, "mov regCnt, amount"
// If compInitMem=true, we can reuse targetReg as regcnt.
// If initMem=true, we can reuse targetReg as regcnt.
// Since size is a constant, regCnt is not yet initialized.
assert(regCnt == REG_NA);
if (m_compiler->info.compInitMem)
if (initMem)
{
regCnt = targetReg;
}
Expand All @@ -1599,7 +1600,7 @@ void CodeGen::genLclHeap(GenTree* tree)
instGen_Set_Reg_To_Imm(((unsigned int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount);
}

if (m_compiler->info.compInitMem)
if (initMem)
{
// At this point 'regCnt' is set to the total number of bytes to locAlloc.
// Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenwasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2884,7 +2884,7 @@ void CodeGen::genLclHeap(GenTree* tree)
assert(m_compiler->compLocallocUsed);
assert(isFramePointerUsed());

bool const needsZeroing = m_compiler->info.compInitMem;
bool const needsZeroing = m_compiler->gtMustZeroLocalloc(tree);
GenTree* const size = tree->AsOp()->gtOp1;

// We reserve this amount of space below any allocation for
Expand Down
11 changes: 7 additions & 4 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2745,6 +2745,7 @@ void CodeGen::genLclHeap(GenTree* tree)
{
assert(tree->OperIs(GT_LCLHEAP));
assert(m_compiler->compLocallocUsed);
genLocallocUsed = true;

GenTree* size = tree->AsOp()->gtOp1;
noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL));
Expand All @@ -2766,6 +2767,8 @@ void CodeGen::genLclHeap(GenTree* tree)
target_size_t stackAdjustment = 0;
target_size_t locAllocStackOffset = 0;

bool const initMem = m_compiler->gtMustZeroLocalloc(tree);

// compute the amount of memory to allocate to properly STACK_ALIGN.
size_t amount = 0;
if (size->IsCnsIntOrI() && size->isContained())
Expand All @@ -2789,7 +2792,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// Compute the size of the block to allocate and perform alignment.
// If compInitMem=true, we can reuse targetReg as regcnt,
// since we don't need any internal registers.
if (m_compiler->info.compInitMem)
if (initMem)
{
assert(internalRegisters.Count(tree) == 0);
regCnt = targetReg;
Expand All @@ -2814,7 +2817,7 @@ void CodeGen::genLclHeap(GenTree* tree)

inst_RV_IV(INS_add, regCnt, STACK_ALIGN - 1, emitActualTypeSize(type));

if (m_compiler->info.compInitMem)
if (initMem)
{
// Convert the count from a count of bytes to a loop count. We will loop once per
// stack alignment size, so each loop will zero 4 bytes on Windows/x86, and 16 bytes
Expand All @@ -2835,7 +2838,7 @@ void CodeGen::genLclHeap(GenTree* tree)
}

bool initMemOrLargeAlloc; // Declaration must be separate from initialization to avoid clang compiler error.
initMemOrLargeAlloc = m_compiler->info.compInitMem || (amount >= m_compiler->eeGetPageSize()); // must be >= not >
initMemOrLargeAlloc = initMem || (amount >= m_compiler->eeGetPageSize()); // must be >= not >

#if FEATURE_FIXED_OUT_ARGS
// If we have an outgoing arg area then we must adjust the SP by popping off the
Expand Down Expand Up @@ -2909,7 +2912,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// We should not have any temp registers at this point.
assert(internalRegisters.Count(tree) == 0);

if (m_compiler->info.compInitMem)
if (initMem)
{
// At this point 'regCnt' is set to the number of loop iterations for this loop, if each
// iteration zeros (and subtracts from the stack pointer) STACK_ALIGN bytes.
Expand Down
14 changes: 13 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3889,6 +3889,15 @@ class Compiler
bool gtTreeHasLocalRead(GenTree* tree, unsigned lclNum);
bool gtTreeHasLocalStore(GenTree* tree, unsigned lclNum);

// Returns true iff the LCLHEAP node "tree" must zero-initialize its
// allocation, either because the method requests init-mem semantics or
// because the node carries the GTF_LCLHEAP_MUSTINIT flag.
bool gtMustZeroLocalloc(GenTree* tree)
{
assert(tree->OperIs(GT_LCLHEAP));
return info.compInitMem || ((tree->gtFlags & GTF_LCLHEAP_MUSTINIT) != 0);
}

void gtSetStmtInfo(Statement* stmt);

// Returns "true" iff "node" has any of the side effects in "flags".
Expand Down Expand Up @@ -6489,7 +6498,10 @@ class Compiler
bool fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call);

PhaseStatus fgExpandStackArrayAllocations();
bool fgExpandStackArrayAllocation(BasicBlock* pBlock, Statement* stmt, GenTreeCall* call);
bool fgExpandStackArrayAllocation(BasicBlock* pBlock,
Statement* stmt,
GenTreeCall* call,
unsigned& frameRunningTotalLclNum);

PhaseStatus fgVNBasedIntrinsicExpansion();
bool fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call);
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14532,6 +14532,8 @@ const char* Compiler::gtGetWellKnownArgNameForArgMsg(WellKnownArg arg)
return "tail call";
case WellKnownArg::StackArrayLocal:
return "&lcl arr";
case WellKnownArg::StackArrayElemSize:
return "arr elemsz";
case WellKnownArg::RuntimeMethodHandle:
return "meth hnd";
case WellKnownArg::AsyncExecutionContext:
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -538,6 +538,8 @@ enum GenTreeFlags : unsigned

GTF_ALLOCOBJ_EMPTY_STATIC = 0x80000000, // GT_ALLOCOBJ -- allocation site is part of an empty static pattern

GTF_LCLHEAP_MUSTINIT = 0x80000000, // GT_LCLHEAP -- allocation must be zeroed

#ifdef FEATURE_HW_INTRINSICS
GTF_HW_EM_OP = 0x10000000, // GT_HWINTRINSIC -- node is used as an operand to an embedded mask
GTF_HW_USER_CALL = 0x20000000, // GT_HWINTRINSIC -- node is implemented via a user call
Expand Down Expand Up @@ -4775,6 +4777,7 @@ enum class WellKnownArg : unsigned
SwiftSelf,
X86TailCallSpecialArg,
StackArrayLocal,
StackArrayElemSize,
RuntimeMethodHandle,
AsyncExecutionContext,
AsyncSynchronizationContext,
Expand Down
Loading
Loading