From c5cefd3f196cc2b2d0d2189aecad4e35d86b23ac Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 4 Feb 2025 11:13:25 -0800 Subject: [PATCH 01/27] proof of concept --- src/coreclr/jit/codegenarm.cpp | 6 +- src/coreclr/jit/codegenarm64.cpp | 12 ++-- src/coreclr/jit/codegenloongarch64.cpp | 14 ++-- src/coreclr/jit/codegenriscv64.cpp | 13 ++-- src/coreclr/jit/codegenxarch.cpp | 10 +-- src/coreclr/jit/gentree.cpp | 3 + src/coreclr/jit/gentree.h | 3 + src/coreclr/jit/helperexpansion.cpp | 68 ++++++++++++++++-- src/coreclr/jit/jitconfigvalues.h | 1 + src/coreclr/jit/objectalloc.cpp | 95 ++++++++++++++++++++------ src/coreclr/jit/objectalloc.h | 44 +++++++----- 11 files changed, 201 insertions(+), 68 deletions(-) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 92d6bc8635224e..fc7cfc33f1c2e2 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -385,6 +385,8 @@ void CodeGen::genLclHeap(GenTree* tree) GenTree* size = tree->AsOp()->gtOp1; noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); + bool const initMem = compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT); + // Result of localloc will be returned in regCnt. // Also it used as temporary register in code generation // for storing allocation size @@ -470,7 +472,7 @@ void CodeGen::genLclHeap(GenTree* tree) goto ALLOC_DONE; } - else if (!compiler->info.compInitMem && (amount < compiler->eeGetPageSize())) // must be < not <= + else if (!initMem && (amount < compiler->eeGetPageSize())) // must be < not <= { // Since the size is less than a page, simply adjust the SP value. // The SP might already be in the guard page, must touch it BEFORE @@ -494,7 +496,7 @@ void CodeGen::genLclHeap(GenTree* tree) } // Allocation - if (compiler->info.compInitMem) + if (initMem) { // At this point 'regCnt' is set to the total number of bytes to localloc. // Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index ec01f356e194ed..d1de02600551b0 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -3155,6 +3155,8 @@ void CodeGen::genLclHeap(GenTree* tree) noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes noway_assert(genStackLevel == 0); // Can't have anything on the stack + bool const initMem = compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT); + // compute the amount of memory to allocate to properly STACK_ALIGN. size_t amount = 0; if (size->IsCnsIntOrI()) @@ -3184,7 +3186,7 @@ void CodeGen::genLclHeap(GenTree* tree) // Compute the size of the block to allocate and perform alignment. // If compInitMem=true, we can reuse targetReg as regcnt, // since we don't need any internal registers. - if (compiler->info.compInitMem) + if (initMem) { assert(internalRegisters.Count(tree) == 0); regCnt = targetReg; @@ -3232,7 +3234,7 @@ void CodeGen::genLclHeap(GenTree* tree) static_assert_no_msg(STACK_ALIGN == storePairRegsWritesBytes); assert(amount % storePairRegsWritesBytes == 0); // stp stores two registers at a time - if (compiler->info.compInitMem) + if (initMem) { if (amount <= compiler->getUnrollThreshold(Compiler::UnrollKind::Memset)) { @@ -3303,10 +3305,10 @@ void CodeGen::genLclHeap(GenTree* tree) } // else, "mov regCnt, amount" - // If compInitMem=true, we can reuse targetReg as regcnt. + // If initMem=true, we can reuse targetReg as regcnt. // Since size is a constant, regCnt is not yet initialized. assert(regCnt == REG_NA); - if (compiler->info.compInitMem) + if (initMem) { assert(internalRegisters.Count(tree) == 0); regCnt = targetReg; @@ -3318,7 +3320,7 @@ void CodeGen::genLclHeap(GenTree* tree) instGen_Set_Reg_To_Imm(((unsigned int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount); } - if (compiler->info.compInitMem) + if (initMem) { BasicBlock* loop = genCreateTempLabel(); diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index f8e26e956209aa..51602c0d9e14fa 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -1600,6 +1600,8 @@ void CodeGen::genLclHeap(GenTree* tree) noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes noway_assert(genStackLevel == 0); // Can't have anything on the stack + bool const initMem = compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT); + // compute the amount of memory to allocate to properly STACK_ALIGN. size_t amount = 0; if (size->IsCnsIntOrI()) @@ -1626,9 +1628,9 @@ void CodeGen::genLclHeap(GenTree* tree) emit->emitIns_J_cond_la(INS_beq, endLabel, targetReg, REG_R0); // Compute the size of the block to allocate and perform alignment. - // If compInitMem=true, we can reuse targetReg as regcnt, + // If initMem=true, we can reuse targetReg as regcnt, // since we don't need any internal registers. - if (compiler->info.compInitMem) + if (initMem) { assert(internalRegisters.Count(tree) == 0); regCnt = targetReg; @@ -1680,7 +1682,7 @@ void CodeGen::genLclHeap(GenTree* tree) static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2)); assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time size_t stpCount = amount / (REGSIZE_BYTES * 2); - if (compiler->info.compInitMem) + if (initMem) { if (stpCount <= 4) { @@ -1727,10 +1729,10 @@ void CodeGen::genLclHeap(GenTree* tree) } // else, "mov regCnt, amount" - // If compInitMem=true, we can reuse targetReg as regcnt. + // If initMem=true, we can reuse targetReg as regcnt. // Since size is a constant, regCnt is not yet initialized. assert(regCnt == REG_NA); - if (compiler->info.compInitMem) + if (initMem) { assert(internalRegisters.Count(tree) == 0); regCnt = targetReg; @@ -1742,7 +1744,7 @@ void CodeGen::genLclHeap(GenTree* tree) instGen_Set_Reg_To_Imm(((unsigned int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount); } - if (compiler->info.compInitMem) + if (initMem) { // At this point 'regCnt' is set to the total number of bytes to locAlloc. // Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 8efe6e0827125c..b04d138992aca1 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -1508,6 +1508,7 @@ void CodeGen::genLclHeap(GenTree* tree) noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes noway_assert(genStackLevel == 0); // Can't have anything on the stack + bool const initMem = compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT); const target_size_t pageSize = compiler->eeGetPageSize(); // According to RISC-V Privileged ISA page size is 4KiB @@ -1539,9 +1540,9 @@ void CodeGen::genLclHeap(GenTree* tree) emit->emitIns_J_cond_la(INS_beq, endLabel, targetReg, REG_R0); // Compute the size of the block to allocate and perform alignment. - // If compInitMem=true, we can reuse targetReg as regcnt, + // If initMem=true, we can reuse targetReg as regcnt, // since we don't need any internal registers. - if (compiler->info.compInitMem) + if (initMem) { regCnt = targetReg; } @@ -1592,7 +1593,7 @@ void CodeGen::genLclHeap(GenTree* tree) static_assert_no_msg(STACK_ALIGN == (REGSIZE_BYTES * 2)); assert(amount % (REGSIZE_BYTES * 2) == 0); // stp stores two registers at a time size_t stpCount = amount / (REGSIZE_BYTES * 2); - if (compiler->info.compInitMem) + if (initMem) { if (stpCount <= 4) { @@ -1641,10 +1642,10 @@ void CodeGen::genLclHeap(GenTree* tree) } // else, "mov regCnt, amount" - // If compInitMem=true, we can reuse targetReg as regcnt. + // If initMem=true, we can reuse targetReg as regcnt. // Since size is a constant, regCnt is not yet initialized. assert(regCnt == REG_NA); - if (compiler->info.compInitMem) + if (initMem) { regCnt = targetReg; } @@ -1655,7 +1656,7 @@ void CodeGen::genLclHeap(GenTree* tree) instGen_Set_Reg_To_Imm(((unsigned int)amount == amount) ? EA_4BYTE : EA_8BYTE, regCnt, amount); } - if (compiler->info.compInitMem) + if (initMem) { // At this point 'regCnt' is set to the total number of bytes to locAlloc. // Since we have to zero out the allocated memory AND ensure that the stack pointer is always valid diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 6cffd104de9814..e0f0a4f65ffe5c 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2870,6 +2870,8 @@ void CodeGen::genLclHeap(GenTree* tree) target_size_t stackAdjustment = 0; target_size_t locAllocStackOffset = 0; + bool const initMem = compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT); + // compute the amount of memory to allocate to properly STACK_ALIGN. size_t amount = 0; if (size->IsCnsIntOrI() && size->isContained()) @@ -2893,7 +2895,7 @@ void CodeGen::genLclHeap(GenTree* tree) // Compute the size of the block to allocate and perform alignment. // If compInitMem=true, we can reuse targetReg as regcnt, // since we don't need any internal registers. - if (compiler->info.compInitMem) + if (initMem) { assert(internalRegisters.Count(tree) == 0); regCnt = targetReg; @@ -2918,7 +2920,7 @@ void CodeGen::genLclHeap(GenTree* tree) inst_RV_IV(INS_add, regCnt, STACK_ALIGN - 1, emitActualTypeSize(type)); - if (compiler->info.compInitMem) + if (initMem) { // Convert the count from a count of bytes to a loop count. We will loop once per // stack alignment size, so each loop will zero 4 bytes on Windows/x86, and 16 bytes @@ -2939,7 +2941,7 @@ void CodeGen::genLclHeap(GenTree* tree) } bool initMemOrLargeAlloc; // Declaration must be separate from initialization to avoid clang compiler error. - initMemOrLargeAlloc = compiler->info.compInitMem || (amount >= compiler->eeGetPageSize()); // must be >= not > + initMemOrLargeAlloc = initMem || (amount >= compiler->eeGetPageSize()); // must be >= not > #if FEATURE_FIXED_OUT_ARGS // If we have an outgoing arg area then we must adjust the SP by popping off the @@ -3013,7 +3015,7 @@ void CodeGen::genLclHeap(GenTree* tree) // We should not have any temp registers at this point. assert(internalRegisters.Count(tree) == 0); - if (compiler->info.compInitMem) + if (initMem) { // At this point 'regCnt' is set to the number of loop iterations for this loop, if each // iteration zeros (and subtracts from the stack pointer) STACK_ALIGN bytes. diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ea02025450b39a..7cbb57bd907429 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -13233,6 +13233,9 @@ const char* Compiler::gtGetWellKnownArgNameForArgMsg(WellKnownArg arg) return "tail call"; case WellKnownArg::StackArrayLocal: return "&lcl arr"; + case WellKnownArg::StackArrayElemSize: + return "arr elemsz"; + default: return nullptr; } diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 641f0b05e1f61a..13ab784d9b66b9 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -550,6 +550,8 @@ enum GenTreeFlags : unsigned int GTF_ALLOCOBJ_EMPTY_STATIC = 0x80000000, // GT_ALLOCOBJ -- allocation site is part of an empty static pattern + GTF_LCLHEAP_MUSTINIT = 0x80000000, // GT_LCLHEAP -- allocation must be zeroed + #ifdef FEATURE_HW_INTRINSICS GTF_HW_EM_OP = 0x10000000, // GT_HWINTRINSIC -- node is used as an operand to an embedded mask GTF_HW_USER_CALL = 0x20000000, // GT_HWINTRINSIC -- node is implemented via a user call @@ -4568,6 +4570,7 @@ enum class WellKnownArg : unsigned SwiftSelf, X86TailCallSpecialArg, StackArrayLocal, + StackArrayElemSize, }; #ifdef DEBUG diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index ff4eb6f8433294..c7f4bb3a7f7c54 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -2832,17 +2832,28 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, return false; } - // If this is a local array, the new helper will have an arg for the array's address + // If this is a local array, the new helper will have an arg for the array's address or an arg + // for the array element size // CallArg* const stackLocalAddressArg = call->gtArgs.FindWellKnownArg(WellKnownArg::StackArrayLocal); + CallArg* const elemSizeArg = call->gtArgs.FindWellKnownArg(WellKnownArg::StackArrayElemSize); - if (stackLocalAddressArg == nullptr) + if ((stackLocalAddressArg == nullptr) && (elemSizeArg == nullptr)) { return false; } - JITDUMP("Expanding new array helper for stack allocated array at [%06d] in " FMT_BB ":\n", dspTreeID(call), - block->bbNum); + // If we have an elem size arg, this is intended to be a localloc + // + // Note we may have figured out the array length after we did the + // escape analysis (that is, lengthArg might be a constant), so we + // could change this from a localloc to a fixed alloc, if we + // introduced a new block lcl var. + // + bool const isLocAlloc = (elemSizeArg != nullptr); + + JITDUMP("Expanding new array helper for stack allocated array at [%06d] %sin " FMT_BB ":\n", dspTreeID(call), + isLocAlloc ? " into localloc " : "", block->bbNum); DISPTREE(call); JITDUMP("\n"); @@ -2859,7 +2870,53 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, } } - GenTree* const stackLocalAddress = stackLocalAddressArg->GetNode(); + GenTree* const lengthArg = call->gtArgs.GetArgByIndex(lengthArgIndex)->GetNode(); + GenTree* stackLocalAddress = nullptr; + + // Todo -- clone and leave option to make a helper call under some runtime check + // for sufficient stack. + // + if (isLocAlloc) + { + assert(elemSizeArg != nullptr); + assert(stackLocalAddressArg == nullptr); + GenTree* const elemSize = elemSizeArg->GetNode(); + assert(elemSize->IsCnsIntOrI()); + + unsigned const locallocTemp = lvaGrabTemp(true DEBUGARG("localloc stack address")); + lvaTable[locallocTemp].lvType = TYP_I_IMPL; + + GenTree* const arrayLength = gtCloneExpr(lengthArg); + GenTree* const baseSize = gtNewIconNode(OFFSETOF__CORINFO_Array__data, TYP_I_IMPL); + GenTree* const payloadSize = gtNewOperNode(GT_MUL, TYP_I_IMPL, elemSize, arrayLength); + GenTree* const totalSize = gtNewOperNode(GT_ADD, TYP_I_IMPL, baseSize, payloadSize); + GenTree* const locallocNode = gtNewOperNode(GT_LCLHEAP, TYP_I_IMPL, totalSize); + GenTree* const locallocStore = gtNewStoreLclVarNode(locallocTemp, locallocNode); + Statement* const locallocStmt = fgNewStmtFromTree(locallocStore); + + gtUpdateStmtSideEffects(locallocStmt); + fgInsertStmtBefore(block, stmt, locallocStmt); + + // Array address is the result of the localloc + // + stackLocalAddress = gtNewLclVarNode(locallocTemp); + compLocallocUsed = true; + + // Codegen must zero out the new allocation. + // + locallocNode->gtFlags &= GTF_LCLHEAP_MUSTINIT; + + codeGen->setFramePointerRequired(true); + } + else + { + assert(elemSizeArg == nullptr); + assert(stackLocalAddressArg != nullptr); + + // Array address is the block local we created earlier + // + stackLocalAddress = stackLocalAddressArg->GetNode(); + } // Initialize the array method table pointer. // @@ -2873,7 +2930,6 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, // Initialize the array length. // - GenTree* const lengthArg = call->gtArgs.GetArgByIndex(lengthArgIndex)->GetNode(); GenTree* const lengthArgInt = fgOptimizeCast(gtNewCastNode(TYP_INT, lengthArg, false, TYP_INT)); GenTree* const lengthAddress = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(stackLocalAddress), gtNewIconNode(OFFSETOF__CORINFO_Array__length, TYP_I_IMPL)); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 2d622cc33b7f5b..ba900848a10227 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -674,6 +674,7 @@ RELEASE_CONFIG_INTEGER(JitObjectStackAllocationConditionalEscape, "JitObjectStac CONFIG_STRING(JitObjectStackAllocationConditionalEscapeRange, "JitObjectStackAllocationConditionalEscapeRange") RELEASE_CONFIG_INTEGER(JitObjectStackAllocationArray, "JitObjectStackAllocationArray", 1) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationSize, "JitObjectStackAllocationSize", 528) +RELEASE_CONFIG_INTEGER(JitObjectStackAllocationLocalloc, "JitObjectStackAllocationLocalloc", 0); RELEASE_CONFIG_INTEGER(JitEECallTimingInfo, "JitEECallTimingInfo", 0) diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp index 8c84d47cf75536..0e9dd0a5db3f44 100644 --- a/src/coreclr/jit/objectalloc.cpp +++ b/src/coreclr/jit/objectalloc.cpp @@ -513,8 +513,7 @@ bool ObjectAllocator::MorphAllocObjNodes() case CORINFO_HELP_NEWARR_1_DIRECT: case CORINFO_HELP_NEWARR_1_ALIGN8: { - if ((data->AsCall()->gtArgs.CountUserArgs() == 2) && - data->AsCall()->gtArgs.GetUserArgByIndex(1)->GetNode()->IsCnsIntOrI()) + if (data->AsCall()->gtArgs.CountUserArgs() == 2) { allocType = OAT_NEWARR; } @@ -533,6 +532,7 @@ bool ObjectAllocator::MorphAllocObjNodes() { bool canStack = false; bool bashCall = false; + bool useLocalloc = false; const char* onHeapReason = nullptr; unsigned int lclNum = stmtExpr->AsLclVar()->GetLclNum(); @@ -578,9 +578,9 @@ bool ObjectAllocator::MorphAllocObjNodes() CORINFO_CLASS_HANDLE clsHnd = comp->gtGetHelperCallClassHandle(data->AsCall(), &isExact, &isNonNull); GenTree* const len = data->AsCall()->gtArgs.GetUserArgByIndex(1)->GetNode(); - assert(len != nullptr); + ssize_t arraySize = len->IsCnsIntOrI() ? len->AsIntCon()->IconValue() : -1; unsigned int blockSize = 0; comp->Metrics.NewArrayHelperCalls++; @@ -589,27 +589,36 @@ bool ObjectAllocator::MorphAllocObjNodes() onHeapReason = "[array type is either non-exact or null]"; canStack = false; } - else if (!len->IsCnsIntOrI()) + else if (!len->IsCnsIntOrI() && !m_UseLocalloc) { - onHeapReason = "[non-constant size]"; + onHeapReason = "[unknown size]"; canStack = false; } - else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, allocType, len->AsIntCon()->IconValue(), - &blockSize, &onHeapReason)) + else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, allocType, arraySize, &blockSize, + &onHeapReason)) { // reason set by the call canStack = false; } else { - JITDUMP("Allocating V%02u on the stack\n", lclNum); + useLocalloc = !len->IsCnsIntOrI(); + JITDUMP("Allocating V%02u on the stack%s\n", lclNum, + useLocalloc ? " [via localloc]" : " [via block local]"); canStack = true; - const unsigned int stackLclNum = + + if (useLocalloc) + { + MorphNewArrNodeIntoLocAlloc(data->AsCall(), clsHnd, len, block, stmt); + } + else + { MorphNewArrNodeIntoStackAlloc(data->AsCall(), clsHnd, (unsigned int)len->AsIntCon()->IconValue(), blockSize, block, stmt); + } - // Note we do not want to rewrite uses of the array temp, so we + // Note we do not want to rewrite uses of lclNum, so we // do not update m_HeapLocalToStackLocalMap. // comp->Metrics.StackAllocatedArrays++; @@ -679,7 +688,11 @@ bool ObjectAllocator::MorphAllocObjNodes() // We keep the set of possibly-stack-pointing pointers as a superset of the set of // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both // sets. - MarkLclVarAsDefinitelyStackPointing(lclNum); + + if (!useLocalloc) + { + MarkLclVarAsDefinitelyStackPointing(lclNum); + } MarkLclVarAsPossiblyStackPointing(lclNum); // If this was conditionally escaping enumerator, establish a connection between this local @@ -799,18 +812,15 @@ GenTree* ObjectAllocator::MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* alloc // block - a basic block where newArr is // stmt - a statement where newArr is // -// Return Value: -// local num for the new stack allocated local -// // Notes: // This function can insert additional statements before stmt. // -unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall* newArr, - CORINFO_CLASS_HANDLE clsHnd, - unsigned int length, - unsigned int blockSize, - BasicBlock* block, - Statement* stmt) +void ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall* newArr, + CORINFO_CLASS_HANDLE clsHnd, + unsigned int length, + unsigned int blockSize, + BasicBlock* block, + Statement* stmt) { assert(newArr != nullptr); assert(m_AnalysisDone); @@ -873,8 +883,51 @@ unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall* // Note that we have stack allocated arrays in this method // comp->setMethodHasStackAllocatedArray(); +} - return lclNum; +//------------------------------------------------------------------------ +// MorphNewArrNodeIntoLocAlloc: Morph a newarray helper call node into a local frame allocation. +// +// Arguments: +// newArr - GT_CALL that will be replaced by helper call. +// clsHnd - class representing the type of the array +// length - operand for length of the array +// block - a basic block where newArr is +// stmt - a statement where newArr is +// +void ObjectAllocator::MorphNewArrNodeIntoLocAlloc( + GenTreeCall* newArr, CORINFO_CLASS_HANDLE clsHnd, GenTree* length, BasicBlock* block, Statement* stmt) +{ + assert(newArr != nullptr); + assert(m_AnalysisDone); + assert(clsHnd != NO_CLASS_HANDLE); + assert(newArr->IsHelperCall()); + assert(newArr->GetHelperNum() != CORINFO_HELP_NEWARR_1_MAYBEFROZEN); + + // Get element size + // + CORINFO_CLASS_HANDLE elemClsHnd = NO_CLASS_HANDLE; + CorInfoType corType = comp->info.compCompHnd->getChildType(clsHnd, &elemClsHnd); + var_types type = JITtype2varType(corType); + ClassLayout* elemLayout = type == TYP_STRUCT ? comp->typGetObjLayout(elemClsHnd) : nullptr; + + const unsigned elemSize = elemLayout != nullptr ? elemLayout->GetSize() : genTypeSize(type); + + // Mark the newarr call as being "on stack", and add the element size + // operand for the stack local as an argument + // + GenTree* const elemSizeNode = comp->gtNewIconNode(elemSize); + newArr->gtArgs.PushBack(comp, NewCallArg::Primitive(elemSizeNode).WellKnown(WellKnownArg::StackArrayElemSize)); + newArr->gtCallMoreFlags |= GTF_CALL_M_STACK_ARRAY; + + // Retype the call result as an unmanaged pointer + // + newArr->ChangeType(TYP_I_IMPL); + newArr->gtReturnType = TYP_I_IMPL; + + // Note that we have stack allocated arrays in this method + // + comp->setMethodHasStackAllocatedArray(); } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h index 593e7b7915335c..3175a80f34931a 100644 --- a/src/coreclr/jit/objectalloc.h +++ b/src/coreclr/jit/objectalloc.h @@ -134,6 +134,7 @@ class ObjectAllocator final : public Phase LocalToLocalMap m_HeapLocalToStackLocalMap; BitSetShortLongRep* m_ConnGraphAdjacencyMatrix; unsigned int m_StackAllocMaxSize; + bool m_UseLocalloc; // Info for conditionally-escaping locals LocalToLocalMap m_EnumeratorLocalToPseudoLocalMap; @@ -176,12 +177,15 @@ class ObjectAllocator final : public Phase GenTree* MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj); unsigned int MorphAllocObjNodeIntoStackAlloc( GenTreeAllocObj* allocObj, CORINFO_CLASS_HANDLE clsHnd, bool isValueClass, BasicBlock* block, Statement* stmt); - unsigned int MorphNewArrNodeIntoStackAlloc(GenTreeCall* newArr, - CORINFO_CLASS_HANDLE clsHnd, - unsigned int length, - unsigned int blockSize, - BasicBlock* block, - Statement* stmt); + void MorphNewArrNodeIntoStackAlloc(GenTreeCall* newArr, + CORINFO_CLASS_HANDLE clsHnd, + unsigned int length, + unsigned int blockSize, + BasicBlock* block, + Statement* stmt); + void MorphNewArrNodeIntoLocAlloc( + GenTreeCall* newArr, CORINFO_CLASS_HANDLE clsHnd, GenTree* length, BasicBlock* block, Statement* stmt); + struct BuildConnGraphVisitorCallbackData; bool CanLclVarEscapeViaParentStack(ArrayStack* parentStack, unsigned int lclNum, BasicBlock* block); void UpdateAncestorTypes(GenTree* tree, ArrayStack* parentStack, var_types newType); @@ -284,6 +288,7 @@ inline ObjectAllocator::ObjectAllocator(Compiler* comp) m_ConnGraphAdjacencyMatrix = nullptr; m_StackAllocMaxSize = (unsigned)JitConfig.JitObjectStackAllocationSize(); + m_UseLocalloc = JitConfig.JitObjectStackAllocationLocalloc(); } //------------------------------------------------------------------------ @@ -313,7 +318,7 @@ inline void ObjectAllocator::EnableObjectStackAllocation() // lclNum - Local variable number // clsHnd - Class/struct handle of the variable class // allocType - Type of allocation (newobj or newarr) -// length - Length of the array (for newarr) +// length - Length of the array (for newarr), -1 for runtime determined size // blockSize - [out, optional] exact size of the object // reason - [out, required] if result is false, reason why // preliminaryCheck - if true, allow checking before analysis is done @@ -353,7 +358,7 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNu return false; } - if ((length < 0) || (length > CORINFO_Array_MaxLength)) + if ((length < -1) || (length > CORINFO_Array_MaxLength)) { *reason = "[invalid array length]"; return false; @@ -370,19 +375,22 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNu return false; } - const unsigned elemSize = elemLayout != nullptr ? elemLayout->GetSize() : genTypeSize(type); + if (length != -1) + { + const unsigned elemSize = elemLayout != nullptr ? elemLayout->GetSize() : genTypeSize(type); - ClrSafeInt totalSize(elemSize); - totalSize *= static_cast(length); - totalSize += static_cast(OFFSETOF__CORINFO_Array__data); + ClrSafeInt totalSize(elemSize); + totalSize *= static_cast(length); + totalSize += static_cast(OFFSETOF__CORINFO_Array__data); - if (totalSize.IsOverflow()) - { - *reason = "[overflow array length]"; - return false; - } + if (totalSize.IsOverflow()) + { + *reason = "[overflow array length]"; + return false; + } - classSize = totalSize.Value(); + classSize = totalSize.Value(); + } } else if (allocType == OAT_NEWOBJ) { From 4d9f1bcd04164f56396cbd6270fcfaf98d503f3e Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 4 Feb 2025 11:51:00 -0800 Subject: [PATCH 02/27] fix flags --- src/coreclr/jit/helperexpansion.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index c7f4bb3a7f7c54..7af986ec75a1a7 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -2891,6 +2891,11 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, GenTree* const payloadSize = gtNewOperNode(GT_MUL, TYP_I_IMPL, elemSize, arrayLength); GenTree* const totalSize = gtNewOperNode(GT_ADD, TYP_I_IMPL, baseSize, payloadSize); GenTree* const locallocNode = gtNewOperNode(GT_LCLHEAP, TYP_I_IMPL, totalSize); + + // Allocation might fail. Codegen must zero the allocation + // + locallocNode->gtFlags &= (GTF_EXCEPT | GTF_LCLHEAP_MUSTINIT); + GenTree* const locallocStore = gtNewStoreLclVarNode(locallocTemp, locallocNode); Statement* const locallocStmt = fgNewStmtFromTree(locallocStore); @@ -2902,10 +2907,8 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, stackLocalAddress = gtNewLclVarNode(locallocTemp); compLocallocUsed = true; - // Codegen must zero out the new allocation. + // We now require a frame pointer // - locallocNode->gtFlags &= GTF_LCLHEAP_MUSTINIT; - codeGen->setFramePointerRequired(true); } else From e3c40198c763d5b0faf5ba99f99771dc659f704c Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 4 Feb 2025 11:51:15 -0800 Subject: [PATCH 03/27] enable by default (for now) --- src/coreclr/jit/jitconfigvalues.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index ba900848a10227..6efa1c70f50884 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -674,7 +674,7 @@ RELEASE_CONFIG_INTEGER(JitObjectStackAllocationConditionalEscape, "JitObjectStac CONFIG_STRING(JitObjectStackAllocationConditionalEscapeRange, "JitObjectStackAllocationConditionalEscapeRange") RELEASE_CONFIG_INTEGER(JitObjectStackAllocationArray, "JitObjectStackAllocationArray", 1) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationSize, "JitObjectStackAllocationSize", 528) -RELEASE_CONFIG_INTEGER(JitObjectStackAllocationLocalloc, "JitObjectStackAllocationLocalloc", 0); +RELEASE_CONFIG_INTEGER(JitObjectStackAllocationLocalloc, "JitObjectStackAllocationLocalloc", 1); RELEASE_CONFIG_INTEGER(JitEECallTimingInfo, "JitEECallTimingInfo", 0) From 23b42611e80d0d5a7e7947462a4ba58d7a917ae8 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 4 Feb 2025 16:16:46 -0800 Subject: [PATCH 04/27] enable for array allocations in loops too --- src/coreclr/jit/helperexpansion.cpp | 14 +++++++------- src/coreclr/jit/jitconfigvalues.h | 1 + src/coreclr/jit/objectalloc.cpp | 8 ++++++-- src/coreclr/jit/objectalloc.h | 7 ++++++- 4 files changed, 20 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 7af986ec75a1a7..930b52528a2c66 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -2847,8 +2847,8 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, // // Note we may have figured out the array length after we did the // escape analysis (that is, lengthArg might be a constant), so we - // could change this from a localloc to a fixed alloc, if we - // introduced a new block lcl var. + // could possibly change this from a localloc to a fixed alloc, + // if we could show that was sound. // bool const isLocAlloc = (elemSizeArg != nullptr); @@ -2886,11 +2886,11 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, unsigned const locallocTemp = lvaGrabTemp(true DEBUGARG("localloc stack address")); lvaTable[locallocTemp].lvType = TYP_I_IMPL; - GenTree* const arrayLength = gtCloneExpr(lengthArg); - GenTree* const baseSize = gtNewIconNode(OFFSETOF__CORINFO_Array__data, TYP_I_IMPL); - GenTree* const payloadSize = gtNewOperNode(GT_MUL, TYP_I_IMPL, elemSize, arrayLength); - GenTree* const totalSize = gtNewOperNode(GT_ADD, TYP_I_IMPL, baseSize, payloadSize); - GenTree* const locallocNode = gtNewOperNode(GT_LCLHEAP, TYP_I_IMPL, totalSize); + GenTree* const arrayLength = gtCloneExpr(lengthArg); + GenTree* const baseSize = gtNewIconNode(OFFSETOF__CORINFO_Array__data, TYP_I_IMPL); + GenTree* const payloadSize = gtNewOperNode(GT_MUL, TYP_I_IMPL, elemSize, arrayLength); + GenTree* const totalSize = gtNewOperNode(GT_ADD, TYP_I_IMPL, baseSize, payloadSize); + GenTree* const locallocNode = gtNewOperNode(GT_LCLHEAP, TYP_I_IMPL, totalSize); // Allocation might fail. Codegen must zero the allocation // diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 6efa1c70f50884..1fc5d8db2a865e 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -675,6 +675,7 @@ CONFIG_STRING(JitObjectStackAllocationConditionalEscapeRange, "JitObjectStackAll RELEASE_CONFIG_INTEGER(JitObjectStackAllocationArray, "JitObjectStackAllocationArray", 1) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationSize, "JitObjectStackAllocationSize", 528) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationLocalloc, "JitObjectStackAllocationLocalloc", 1); +RELEASE_CONFIG_INTEGER(JitObjectStackAllocationInLoop, "JitObjectStackAllocationInLoop", 1); RELEASE_CONFIG_INTEGER(JitEECallTimingInfo, "JitEECallTimingInfo", 0) diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp index 0e9dd0a5db3f44..a70dbf04d5a4e5 100644 --- a/src/coreclr/jit/objectalloc.cpp +++ b/src/coreclr/jit/objectalloc.cpp @@ -543,7 +543,7 @@ bool ObjectAllocator::MorphAllocObjNodes() onHeapReason = "[object stack allocation disabled]"; canStack = false; } - else if (basicBlockHasBackwardJump) + else if (basicBlockHasBackwardJump && !((allocType == OAT_NEWARR) && m_UseLocallocInLoop)) { onHeapReason = "[alloc in loop]"; canStack = false; @@ -602,7 +602,7 @@ bool ObjectAllocator::MorphAllocObjNodes() } else { - useLocalloc = !len->IsCnsIntOrI(); + useLocalloc = !len->IsCnsIntOrI() || basicBlockHasBackwardJump; JITDUMP("Allocating V%02u on the stack%s\n", lclNum, useLocalloc ? " [via localloc]" : " [via block local]"); canStack = true; @@ -928,6 +928,10 @@ void ObjectAllocator::MorphNewArrNodeIntoLocAlloc( // Note that we have stack allocated arrays in this method // comp->setMethodHasStackAllocatedArray(); + + // Notify the compiler; this disables fast tail calls (for now) + // + comp->compLocallocUsed = true; } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h index 3175a80f34931a..841f29391fd4c6 100644 --- a/src/coreclr/jit/objectalloc.h +++ b/src/coreclr/jit/objectalloc.h @@ -135,6 +135,7 @@ class ObjectAllocator final : public Phase BitSetShortLongRep* m_ConnGraphAdjacencyMatrix; unsigned int m_StackAllocMaxSize; bool m_UseLocalloc; + bool m_UseLocallocInLoop; // Info for conditionally-escaping locals LocalToLocalMap m_EnumeratorLocalToPseudoLocalMap; @@ -288,7 +289,11 @@ inline ObjectAllocator::ObjectAllocator(Compiler* comp) m_ConnGraphAdjacencyMatrix = nullptr; m_StackAllocMaxSize = (unsigned)JitConfig.JitObjectStackAllocationSize(); - m_UseLocalloc = JitConfig.JitObjectStackAllocationLocalloc(); + + // OSR does not support localloc (though seems like late-introduced localloc might be ok) + // + m_UseLocalloc = JitConfig.JitObjectStackAllocationLocalloc() && !comp->opts.IsOSR(); + m_UseLocallocInLoop = m_UseLocalloc && JitConfig.JitObjectStackAllocationInLoop(); } //------------------------------------------------------------------------ From d096d6312d463a0a9d9b69b0383167a87945fe79 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 4 Feb 2025 17:48:15 -0800 Subject: [PATCH 05/27] add missing bit of code --- src/coreclr/jit/morph.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 4c0fc233196482..720394e437c422 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -736,6 +736,8 @@ const char* getWellKnownArgName(WellKnownArg arg) return "X86TailCallSpecialArg"; case WellKnownArg::StackArrayLocal: return "StackArrayLocal"; + case WellKnownArg::StackArrayElemSize: + return "StackArrayElemSize"; } return "N/A"; From 032f7242877090b862622cfd78bb86c72bf2283e Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 5 Feb 2025 13:10:02 -0800 Subject: [PATCH 06/27] add handler check; fix elem size type --- src/coreclr/jit/objectalloc.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp index a70dbf04d5a4e5..0cb6398d6b0a3b 100644 --- a/src/coreclr/jit/objectalloc.cpp +++ b/src/coreclr/jit/objectalloc.cpp @@ -483,6 +483,7 @@ bool ObjectAllocator::MorphAllocObjNodes() const bool basicBlockHasNewObj = block->HasFlag(BBF_HAS_NEWOBJ); const bool basicBlockHasNewArr = block->HasFlag(BBF_HAS_NEWARR); const bool basicBlockHasBackwardJump = block->HasFlag(BBF_BACKWARD_JUMP); + const bool basicBlockInHandler = block->hasHndIndex(); if (!basicBlockHasNewObj && !basicBlockHasNewArr) { @@ -594,6 +595,11 @@ bool ObjectAllocator::MorphAllocObjNodes() onHeapReason = "[unknown size]"; canStack = false; } + else if (!len->IsCnsIntOrI() && basicBlockInHandler) + { + onHeapReason = "[unknown size, in handler]"; + canStack = false; + } else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, allocType, arraySize, &blockSize, &onHeapReason)) { @@ -916,7 +922,7 @@ void ObjectAllocator::MorphNewArrNodeIntoLocAlloc( // Mark the newarr call as being "on stack", and add the element size // operand for the stack local as an argument // - GenTree* const elemSizeNode = comp->gtNewIconNode(elemSize); + GenTree* const elemSizeNode = comp->gtNewIconNode(elemSize, TYP_I_IMPL); newArr->gtArgs.PushBack(comp, NewCallArg::Primitive(elemSizeNode).WellKnown(WellKnownArg::StackArrayElemSize)); newArr->gtCallMoreFlags |= GTF_CALL_M_STACK_ARRAY; From 1e088c4f657c90b8ff7ee0dc840fd9346595299a Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 5 Feb 2025 19:06:20 -0800 Subject: [PATCH 07/27] fix zero init logic --- src/coreclr/jit/helperexpansion.cpp | 2 +- src/coreclr/jit/lsraarm.cpp | 2 +- src/coreclr/jit/lsraarm64.cpp | 4 ++-- src/coreclr/jit/lsraloongarch64.cpp | 4 ++-- src/coreclr/jit/lsrariscv64.cpp | 4 ++-- src/coreclr/jit/lsraxarch.cpp | 5 +++-- 6 files changed, 11 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 930b52528a2c66..57ab0425a603fb 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -2894,7 +2894,7 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, // Allocation might fail. Codegen must zero the allocation // - locallocNode->gtFlags &= (GTF_EXCEPT | GTF_LCLHEAP_MUSTINIT); + locallocNode->gtFlags |= (GTF_EXCEPT | GTF_LCLHEAP_MUSTINIT); GenTree* const locallocStore = gtNewStoreLclVarNode(locallocTemp, locallocNode); Statement* const locallocStmt = fgNewStmtFromTree(locallocStore); diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index 815f0149aede11..efa7d1e3a82ed6 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -68,7 +68,7 @@ int LinearScan::BuildLclHeap(GenTree* tree) { internalIntCount = 0; } - else if (!compiler->info.compInitMem) + else if (!(compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) { // No need to initialize allocated stack space. if (sizeVal < compiler->eeGetPageSize()) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 9af6bef2f17f19..c8f1706e1f4ca0 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1183,7 +1183,7 @@ int LinearScan::BuildNode(GenTree* tree) { // Need no internal registers } - else if (!compiler->info.compInitMem) + else if (!(compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) { // No need to initialize allocated stack space. if (sizeVal < compiler->eeGetPageSize()) @@ -1202,7 +1202,7 @@ int LinearScan::BuildNode(GenTree* tree) else { srcCount = 1; - if (!compiler->info.compInitMem) + if (!(compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) { buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 529e6d8127b670..d0064dc3a9d88d 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -441,7 +441,7 @@ int LinearScan::BuildNode(GenTree* tree) { // Need no internal registers } - else if (!compiler->info.compInitMem) + else if (!(compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) { // No need to initialize allocated stack space. if (sizeVal < compiler->eeGetPageSize()) @@ -460,7 +460,7 @@ int LinearScan::BuildNode(GenTree* tree) else { srcCount = 1; - if (!compiler->info.compInitMem) + if (!(compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) { buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 1185eac4cea938..ae6b5c76d6ebaa 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -560,7 +560,7 @@ int LinearScan::BuildNode(GenTree* tree) { // Need no internal registers } - else if (!compiler->info.compInitMem) + else if (!(compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) { // No need to initialize allocated stack space. if (sizeVal < compiler->eeGetPageSize()) @@ -581,7 +581,7 @@ int LinearScan::BuildNode(GenTree* tree) else { srcCount = 1; - if (!compiler->info.compInitMem) + if (!(compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) { buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 98129f9016cc10..eeda2b5fd2b2fd 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1812,14 +1812,15 @@ int LinearScan::BuildLclHeap(GenTree* tree) size_t sizeVal = AlignUp((size_t)size->AsIntCon()->gtIconVal, STACK_ALIGN); // Explicitly zeroed LCLHEAP also needs a regCnt in case of x86 or large page - if ((TARGET_POINTER_SIZE == 4) || (sizeVal >= compiler->eeGetPageSize())) + if ((TARGET_POINTER_SIZE == 4) || (sizeVal >= compiler->eeGetPageSize()) || + (tree->gtFlags & GTF_LCLHEAP_MUSTINIT)) { buildInternalIntRegisterDefForNode(tree); } } else { - if (!compiler->info.compInitMem) + if (!(compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) { // For regCnt buildInternalIntRegisterDefForNode(tree); From 526ba2fb3dc8b8b02d661aac1c1b74a550bfda41 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 20 Feb 2025 17:34:10 -0800 Subject: [PATCH 08/27] update post merge, cleanup a bit --- src/coreclr/jit/objectalloc.cpp | 28 +++++++++------------------- src/coreclr/jit/objectalloc.h | 15 +++++++-------- 2 files changed, 16 insertions(+), 27 deletions(-) diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp index ee93645c3e7c67..80ac6874cf8a14 100644 --- a/src/coreclr/jit/objectalloc.cpp +++ b/src/coreclr/jit/objectalloc.cpp @@ -619,9 +619,7 @@ bool ObjectAllocator::MorphAllocObjNodes() } else { - MorphNewArrNodeIntoStackAlloc(data->AsCall(), clsHnd, - (unsigned int)len->AsIntCon()->IconValue(), blockSize, - block, stmt); + MorphNewArrNodeIntoStackAlloc(data->AsCall(), clsHnd, len, block, stmt); } // Note we do not want to rewrite uses of lclNum, so we @@ -813,37 +811,29 @@ GenTree* ObjectAllocator::MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* alloc // Arguments: // newArr - GT_CALL that will be replaced by helper call. // clsHnd - class representing the type of the array -// length - length of the array -// blockSize - size of the layout +// len - tree representing length of the array (must be a constant) // block - a basic block where newArr is // stmt - a statement where newArr is // // Notes: // This function can insert additional statements before stmt. // -void ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall* newArr, - CORINFO_CLASS_HANDLE clsHnd, - unsigned int length, - unsigned int blockSize, - BasicBlock* block, - Statement* stmt) +void ObjectAllocator::MorphNewArrNodeIntoStackAlloc( + GenTreeCall* newArr, CORINFO_CLASS_HANDLE clsHnd, GenTree* len, BasicBlock* block, Statement* stmt) { assert(newArr != nullptr); assert(m_AnalysisDone); assert(clsHnd != NO_CLASS_HANDLE); assert(newArr->IsHelperCall()); assert(newArr->GetHelperNum() != CORINFO_HELP_NEWARR_1_MAYBEFROZEN); + assert(len->IsCnsIntOrI()); + const unsigned length = (unsigned int)len->AsIntCon()->IconValue(); const bool shortLifetime = false; const bool alignTo8 = newArr->GetHelperNum() == CORINFO_HELP_NEWARR_1_ALIGN8; const unsigned int lclNum = comp->lvaGrabTemp(shortLifetime DEBUGARG("stack allocated array temp")); LclVarDsc* const lclDsc = comp->lvaGetDesc(lclNum); - if (alignTo8) - { - blockSize = AlignUp(blockSize, 8); - } - comp->lvaSetStruct(lclNum, comp->typGetArrayLayout(clsHnd, length), /* unsafe */ false); lclDsc->lvStackAllocatedObject = true; @@ -926,10 +916,10 @@ void ObjectAllocator::MorphNewArrNodeIntoLocAlloc( newArr->gtArgs.PushBack(comp, NewCallArg::Primitive(elemSizeNode).WellKnown(WellKnownArg::StackArrayElemSize)); newArr->gtCallMoreFlags |= GTF_CALL_M_STACK_ARRAY; - // Retype the call result as an unmanaged pointer + // Retype the call result as a byref (we may decide to heap allocate at runtime). // - newArr->ChangeType(TYP_I_IMPL); - newArr->gtReturnType = TYP_I_IMPL; + newArr->ChangeType(TYP_BYREF); + newArr->gtReturnType = TYP_BYREF; // Note that we have stack allocated arrays in this method // diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h index 2aeb9d4df592dc..6190f86b26e662 100644 --- a/src/coreclr/jit/objectalloc.h +++ b/src/coreclr/jit/objectalloc.h @@ -178,12 +178,8 @@ class ObjectAllocator final : public Phase GenTree* MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* allocObj); unsigned int MorphAllocObjNodeIntoStackAlloc( GenTreeAllocObj* allocObj, CORINFO_CLASS_HANDLE clsHnd, bool isValueClass, BasicBlock* block, Statement* stmt); - void MorphNewArrNodeIntoStackAlloc(GenTreeCall* newArr, - CORINFO_CLASS_HANDLE clsHnd, - unsigned int length, - unsigned int blockSize, - BasicBlock* block, - Statement* stmt); + void MorphNewArrNodeIntoStackAlloc( + GenTreeCall* newArr, CORINFO_CLASS_HANDLE clsHnd, GenTree* length, BasicBlock* block, Statement* stmt); void MorphNewArrNodeIntoLocAlloc( GenTreeCall* newArr, CORINFO_CLASS_HANDLE clsHnd, GenTree* length, BasicBlock* block, Statement* stmt); @@ -369,8 +365,11 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNu return false; } - ClassLayout* const layout = comp->typGetArrayLayout(clsHnd, (unsigned)length); - classSize = layout->GetSize(); + if (length != -1) + { + ClassLayout* const layout = comp->typGetArrayLayout(clsHnd, (unsigned)length); + classSize = layout->GetSize(); + } } else if (allocType == OAT_NEWOBJ) { From 9e7cf8ee2e807a7e8b02dfe19cd4ff043d855773 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 20 Feb 2025 17:34:46 -0800 Subject: [PATCH 09/27] pad localloc array size as if it was on heap; handle align8 --- src/coreclr/jit/helperexpansion.cpp | 52 ++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 5046fc3dc98a42..dbfc7c0e632339 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -2857,6 +2857,7 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, // if we could show that was sound. // bool const isLocAlloc = (elemSizeArg != nullptr); + bool const isAlign8 = isLocAlloc && (helper == CORINFO_HELP_NEWARR_1_ALIGN8); JITDUMP("Expanding new array helper for stack allocated array at [%06d] %sin " FMT_BB ":\n", dspTreeID(call), isLocAlloc ? " into localloc " : "", block->bbNum); @@ -2892,10 +2893,38 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, unsigned const locallocTemp = lvaGrabTemp(true DEBUGARG("localloc stack address")); lvaTable[locallocTemp].lvType = TYP_I_IMPL; - GenTree* const arrayLength = gtCloneExpr(lengthArg); - GenTree* const baseSize = gtNewIconNode(OFFSETOF__CORINFO_Array__data, TYP_I_IMPL); - GenTree* const payloadSize = gtNewOperNode(GT_MUL, TYP_I_IMPL, elemSize, arrayLength); - GenTree* const totalSize = gtNewOperNode(GT_ADD, TYP_I_IMPL, baseSize, payloadSize); + GenTree* const arrayLength = gtCloneExpr(lengthArg); + GenTree* const baseSize = gtNewIconNode(OFFSETOF__CORINFO_Array__data, TYP_I_IMPL); + GenTree* const payloadSize = gtNewOperNode(GT_MUL, TYP_I_IMPL, elemSize, arrayLength); + GenTree* totalSize = gtNewOperNode(GT_ADD, TYP_I_IMPL, baseSize, payloadSize); + + unsigned const elemSizeValue = (unsigned)elemSize->AsIntCon()->IconValue(); + + if ((elemSizeValue % TARGET_POINTER_SIZE) != 0) + { + // Round size up to TARGET_POINTER_SIZE. + // size = (size + TPS) & ~(TPS-1) + // + GenTree* const roundSize = gtNewIconNode(TARGET_POINTER_SIZE, TYP_I_IMPL); + GenTree* const biasedSize = gtNewOperNode(GT_ADD, TYP_I_IMPL, totalSize, roundSize); + GenTree* const mask = gtNewIconNode(TARGET_POINTER_SIZE - 1, TYP_I_IMPL); + GenTree* const invMask = gtNewOperNode(GT_NOT, TYP_I_IMPL, mask); + GenTree* const paddedSize = gtNewOperNode(GT_AND, TYP_I_IMPL, biasedSize, invMask); + + totalSize = paddedSize; + } + +#ifndef TARGET_64BIT + if (isAlign8) + { + // For Align8, allocate an extra TARGET_POINTER_SIZED (4) bytes so + // we can fix alignment below. + // + GenTree* const alignSize = gtNewIconNode(4, TYP_I_IMPL); + totalSize = gtNewOperNode(GT_ADD, TYP_I_IMPL, totalSize, alignSize); + } +#endif + GenTree* const locallocNode = gtNewOperNode(GT_LCLHEAP, TYP_I_IMPL, totalSize); // Allocation might fail. Codegen must zero the allocation @@ -2913,6 +2942,21 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, stackLocalAddress = gtNewLclVarNode(locallocTemp); compLocallocUsed = true; +#ifndef TARGET_64BIT + if (isAlign8) + { + // For Align8, adjust address to be suitably aligned. + // Addr = (Localloc + 4) & ~7; + // + GenTree* const alignSize = gtNewIconNode(4, TYP_I_IMPL); + GenTree* const biasedAddress = gtNewOperNode(GT_ADD, TYP_I_IMPL, stackLocalAddress, alignSize); + GenTree* const alignMaskInv = gtNewIconNode(-8, TYP_I_IMPL); + GenTree* const alignedAddress = gtNewOperNode(GT_AND, TYP_I_IMPL, biasedAddress, alignMaskInv); + + stackLocalAddress = alignedAddress; + } +#endif + // We now require a frame pointer // codeGen->setFramePointerRequired(true); From e24712353a7444dc741a64d784880e9d51e58c94 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 22 Feb 2025 08:41:28 -0800 Subject: [PATCH 10/27] don't allow localloc for gc type arrays --- src/coreclr/jit/jitmetadatalist.h | 1 + src/coreclr/jit/objectalloc.cpp | 20 ++++++++++---------- src/coreclr/jit/objectalloc.h | 19 ++++++++++++++----- 3 files changed, 25 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/jitmetadatalist.h b/src/coreclr/jit/jitmetadatalist.h index 215ea82ead5606..6c03726f968529 100644 --- a/src/coreclr/jit/jitmetadatalist.h +++ b/src/coreclr/jit/jitmetadatalist.h @@ -89,6 +89,7 @@ JITMETADATAMETRIC(NewBoxedValueClassHelperCalls, int, 0) JITMETADATAMETRIC(StackAllocatedBoxedValueClasses, int, 0) JITMETADATAMETRIC(NewArrayHelperCalls, int, 0) JITMETADATAMETRIC(StackAllocatedArrays, int, 0) +JITMETADATAMETRIC(LocallocAllocatedArrays, int, 0) JITMETADATAMETRIC(LocalAssertionCount, int, 0) JITMETADATAMETRIC(LocalAssertionOverflow, int, 0) JITMETADATAMETRIC(MorphTrackedLocals, int, 0) diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp index 80ac6874cf8a14..6c973fe1ce37a8 100644 --- a/src/coreclr/jit/objectalloc.cpp +++ b/src/coreclr/jit/objectalloc.cpp @@ -581,7 +581,7 @@ bool ObjectAllocator::MorphAllocObjNodes() GenTree* const len = data->AsCall()->gtArgs.GetUserArgByIndex(1)->GetNode(); assert(len != nullptr); - ssize_t arraySize = len->IsCnsIntOrI() ? len->AsIntCon()->IconValue() : -1; + ssize_t arraySize = len->IsCnsIntOrI() ? len->AsIntCon()->IconValue() : 1; unsigned int blockSize = 0; comp->Metrics.NewArrayHelperCalls++; @@ -600,8 +600,8 @@ bool ObjectAllocator::MorphAllocObjNodes() onHeapReason = "[unknown size, in handler]"; canStack = false; } - else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, allocType, arraySize, &blockSize, - &onHeapReason)) + else if (!CanAllocateLclVarOnStack(lclNum, clsHnd, allocType, arraySize, len->IsCnsIntOrI(), + &blockSize, &onHeapReason)) { // reason set by the call canStack = false; @@ -616,16 +616,13 @@ bool ObjectAllocator::MorphAllocObjNodes() if (useLocalloc) { MorphNewArrNodeIntoLocAlloc(data->AsCall(), clsHnd, len, block, stmt); + comp->Metrics.LocallocAllocatedArrays++; } else { MorphNewArrNodeIntoStackAlloc(data->AsCall(), clsHnd, len, block, stmt); + comp->Metrics.StackAllocatedArrays++; } - - // Note we do not want to rewrite uses of lclNum, so we - // do not update m_HeapLocalToStackLocalMap. - // - comp->Metrics.StackAllocatedArrays++; } } else if (allocType == OAT_NEWOBJ) @@ -653,7 +650,8 @@ bool ObjectAllocator::MorphAllocObjNodes() comp->Metrics.NewRefClassHelperCalls++; } - if (!CanAllocateLclVarOnStack(lclNum, clsHnd, allocType, 0, nullptr, &onHeapReason)) + if (!CanAllocateLclVarOnStack(lclNum, clsHnd, allocType, /* length */ 0, /* lengthKnown */ true, + nullptr, &onHeapReason)) { // reason set by the call canStack = false; @@ -1333,6 +1331,7 @@ void ObjectAllocator::UpdateAncestorTypes(GenTree* tree, ArrayStack* p case GT_IND: case GT_CALL: + // Watch for helper calls that have retyped operands...? break; default: @@ -2026,7 +2025,8 @@ void ObjectAllocator::CheckForGuardedAllocationOrCopy(BasicBlock* block, const char* reason = nullptr; unsigned size = 0; unsigned length = TARGET_POINTER_SIZE; - if (CanAllocateLclVarOnStack(enumeratorLocal, clsHnd, OAT_NEWOBJ, length, &size, &reason, + if (CanAllocateLclVarOnStack(enumeratorLocal, clsHnd, OAT_NEWOBJ, length, /* length known */ true, + &size, &reason, /* preliminaryCheck */ true)) { // We are going to conditionally track accesses to the enumerator local via a pseudo local. diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h index 6190f86b26e662..beef00276d9418 100644 --- a/src/coreclr/jit/objectalloc.h +++ b/src/coreclr/jit/objectalloc.h @@ -154,6 +154,7 @@ class ObjectAllocator final : public Phase CORINFO_CLASS_HANDLE clsHnd, ObjectAllocationType allocType, ssize_t length, + bool lengthKnown, unsigned int* blockSize, const char** reason, bool preliminaryCheck = false); @@ -319,7 +320,8 @@ inline void ObjectAllocator::EnableObjectStackAllocation() // lclNum - Local variable number // clsHnd - Class/struct handle of the variable class // allocType - Type of allocation (newobj or newarr) -// length - Length of the array (for newarr), -1 for runtime determined size +// length - Length of the array (for newarr), 1 for runtime determined size +// lengthKnown - true if length is known // blockSize - [out, optional] exact size of the object // reason - [out, required] if result is false, reason why // preliminaryCheck - if true, allow checking before analysis is done @@ -332,6 +334,7 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNu CORINFO_CLASS_HANDLE clsHnd, ObjectAllocationType allocType, ssize_t length, + bool lengthKnown, unsigned int* blockSize, const char** reason, bool preliminaryCheck) @@ -359,16 +362,22 @@ inline bool ObjectAllocator::CanAllocateLclVarOnStack(unsigned int lclNu return false; } - if ((length < -1) || (length > CORINFO_Array_MaxLength)) + if ((length < 0) || (length > CORINFO_Array_MaxLength)) { *reason = "[invalid array length]"; return false; } - if (length != -1) + ClassLayout* const layout = comp->typGetArrayLayout(clsHnd, (unsigned)length); + classSize = layout->GetSize(); + + if (!lengthKnown && layout->HasGCPtr()) { - ClassLayout* const layout = comp->typGetArrayLayout(clsHnd, (unsigned)length); - classSize = layout->GetSize(); + // We can't represent GC info for these yet + // + assert(length == 1); + *reason = "[unknown length, gc elements]"; + return false; } } else if (allocType == OAT_NEWOBJ) From f4213430342b41ff74622c9ddfd82372800a52fd Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 21 Feb 2025 09:31:06 -0800 Subject: [PATCH 11/27] simple runtime check for stack alloc vs heap alloc --- src/coreclr/jit/helperexpansion.cpp | 130 +++++++++++++++++++++++++--- 1 file changed, 116 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index dbfc7c0e632339..7538d7a41115b2 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -2813,6 +2813,14 @@ PhaseStatus Compiler::fgExpandStackArrayAllocations() // Returns: // true if a runtime lookup was found and expanded. // +// Remarks: +// For arrays whose size was large or not known during stack allocation analysis, +// the allocation expands into a runtime check followed by localloc (if small) +// or heapalloc (if big). +// +// For known sized arrays, we assume upstream analysis has limited size to +// something reasonable, and the allocation is into fixed local storage. +// bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, GenTreeCall* call) { if (!call->IsHelperCall()) @@ -2849,7 +2857,22 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, return false; } - // If we have an elem size arg, this is intended to be a localloc + // Remove these args since we may leave the call in the IR as a normal helper. + // (or just make a new call?) + // + call->gtArgs.ResetFinalArgsAndABIInfo(); + if (stackLocalAddressArg != nullptr) + { + call->gtArgs.Remove(stackLocalAddressArg); + } + if (elemSizeArg != nullptr) + { + call->gtArgs.Remove(elemSizeArg); + } + call->gtArgs.ArgsComplete(this, call); + call->gtArgs.AddFinalArgsAndDetermineABIInfo(this, call); + + // If we have an elem size arg, this is intended to be a localloc/heapalloc // // Note we may have figured out the array length after we did the // escape analysis (that is, lengthArg might be a constant), so we @@ -2880,8 +2903,8 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, GenTree* const lengthArg = call->gtArgs.GetArgByIndex(lengthArgIndex)->GetNode(); GenTree* stackLocalAddress = nullptr; - // Todo -- clone and leave option to make a helper call under some runtime check - // for sufficient stack. + // If we have a localloc, compute (at runtime) overall size, and check length + // against a threshold. If over, heap allocate. // if (isLocAlloc) { @@ -2925,7 +2948,75 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, } #endif - GenTree* const locallocNode = gtNewOperNode(GT_LCLHEAP, TYP_I_IMPL, totalSize); + // We will need total size twice, so spill it to a local + // + unsigned const totalSizeTemp = lvaGrabTemp(false DEBUGARG("lcl/heap alloc size")); + lvaTable[totalSizeTemp].lvType = TYP_I_IMPL; + GenTree* const totalSizeStore = gtNewStoreLclVarNode(totalSizeTemp, totalSize); + + Statement* const totalSizeStmt = fgNewStmtFromTree(totalSizeStore); + gtUpdateStmtSideEffects(totalSizeStmt); + fgInsertStmtBefore(block, stmt, totalSizeStmt); + + // Check the length against our runtime threshold. For now we just check against + // the fixed length limit (528 bytes). + // + GenTree* const totalSizeForCheck = gtNewLclVarNode(totalSizeTemp); + GenTree* const runtimeSizeLimit = gtNewIconNode((unsigned)JitConfig.JitObjectStackAllocationSize(), TYP_I_IMPL); + GenTree* const runtimeSizeCompare = gtNewOperNode(GT_GT, TYP_INT, totalSizeForCheck, runtimeSizeLimit); + GenTree* const runtimeSizeCheck = gtNewOperNode(GT_JTRUE, TYP_VOID, runtimeSizeCompare); + + Statement* const runtimeSizeCheckStmt = fgNewStmtFromTree(runtimeSizeCheck); + gtUpdateStmtSideEffects(runtimeSizeCheckStmt); + fgInsertStmtBefore(block, stmt, runtimeSizeCheckStmt); + + // Split block after the call, and insert blocks for the localloc and the heap alloc + // + BasicBlock* const remainderBlock = fgSplitBlockAfterStatement(block, stmt); + BasicBlock* const locallocBlock = fgNewBBafter(BBJ_ALWAYS, block, /* extendRegion */ true); + BasicBlock* const heapallocBlock = fgNewBBafter(BBJ_ALWAYS, locallocBlock, /* extendRegion */ true); + + // Wire up new flow.... assume (for now) localloc is more likely + // + FlowEdge* const blockRemainderEdge = fgGetPredForBlock(remainderBlock, block); + fgRemoveRefPred(blockRemainderEdge); + + FlowEdge* const locallocInEdge = fgAddRefPred(locallocBlock, block); + FlowEdge* const locallocOutEdge = fgAddRefPred(remainderBlock, locallocBlock); + + locallocInEdge->setLikelihood(0.8); + locallocBlock->inheritWeightPercentage(block, 80); + locallocOutEdge->setLikelihood(1.0); + locallocBlock->SetTargetEdge(locallocOutEdge); + + FlowEdge* const heapallocInEdge = fgAddRefPred(heapallocBlock, block); + FlowEdge* const heapallocOutEdge = fgAddRefPred(remainderBlock, heapallocBlock); + + heapallocInEdge->setLikelihood(0.2); + heapallocBlock->inheritWeightPercentage(block, 20); + heapallocOutEdge->setLikelihood(1.0); + heapallocBlock->SetTargetEdge(heapallocOutEdge); + + block->SetCond(heapallocInEdge, locallocInEdge); + + // Now fill in the heapalloc block. + // We expect *callUse's user to be a local store. + // + assert((*callUse)->gtNext->OperIs(GT_STORE_LCL_VAR)); + unsigned const useLclNum = (*callUse)->gtNext->AsLclVarCommon()->GetLclNum(); + GenTree* const heapAllocStore = gtNewStoreLclVarNode(useLclNum, call); + Statement* const heapAllocStmt = fgNewStmtFromTree(heapAllocStore); + + gtUpdateStmtSideEffects(heapAllocStmt); + fgInsertStmtAtBeg(heapallocBlock, heapAllocStmt); + + // Fill in the first part of the localloc block + // + fgUnlinkStmt(block, stmt); + fgInsertStmtAtBeg(locallocBlock, stmt); + + GenTree* const totalSizeForAlloc = gtNewLclVarNode(totalSizeTemp); + GenTree* const locallocNode = gtNewOperNode(GT_LCLHEAP, TYP_I_IMPL, totalSizeForAlloc); // Allocation might fail. Codegen must zero the allocation // @@ -2935,7 +3026,7 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, Statement* const locallocStmt = fgNewStmtFromTree(locallocStore); gtUpdateStmtSideEffects(locallocStmt); - fgInsertStmtBefore(block, stmt, locallocStmt); + fgInsertStmtBefore(locallocBlock, stmt, locallocStmt); // Array address is the result of the localloc // @@ -2960,6 +3051,11 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, // We now require a frame pointer // codeGen->setFramePointerRequired(true); + + // Update block so code below finishes initializing the localloc array + // in the localloc block. + // + block = locallocBlock; } else { @@ -2973,26 +3069,32 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, // Initialize the array method table pointer. // - GenTree* const mt = call->gtArgs.GetArgByIndex(typeArgIndex)->GetNode(); - GenTree* const mtStore = gtNewStoreValueNode(TYP_I_IMPL, stackLocalAddress, mt); - Statement* const mtStmt = fgNewStmtFromTree(mtStore); + GenTree* const mt = call->gtArgs.GetArgByIndex(typeArgIndex)->GetNode(); + GenTree* const mtToStore = isLocAlloc ? gtCloneExpr(mt) : mt; + GenTree* const mtStore = gtNewStoreValueNode(TYP_I_IMPL, stackLocalAddress, mtToStore); + Statement* const mtStmt = fgNewStmtFromTree(mtStore); fgInsertStmtBefore(block, stmt, mtStmt); // Initialize the array length. // - GenTree* const lengthArgInt = fgOptimizeCast(gtNewCastNode(TYP_INT, lengthArg, false, TYP_INT)); - GenTree* const lengthAddress = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(stackLocalAddress), - gtNewIconNode(OFFSETOF__CORINFO_Array__length, TYP_I_IMPL)); - GenTree* const lengthStore = gtNewStoreValueNode(TYP_INT, lengthAddress, lengthArgInt); - Statement* const lenStmt = fgNewStmtFromTree(lengthStore); + GenTree* const arrayLengthToStore = isLocAlloc ? gtCloneExpr(lengthArg) : lengthArg; + GenTree* const lengthArgInt = fgOptimizeCast(gtNewCastNode(TYP_INT, arrayLengthToStore, false, TYP_INT)); + GenTree* const lengthAddress = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(stackLocalAddress), + gtNewIconNode(OFFSETOF__CORINFO_Array__length, TYP_I_IMPL)); + GenTree* const lengthStore = gtNewStoreValueNode(TYP_INT, lengthAddress, lengthArgInt); + Statement* const lenStmt = fgNewStmtFromTree(lengthStore); fgInsertStmtBefore(block, stmt, lenStmt); // Replace call with local address // *callUse = gtCloneExpr(stackLocalAddress); - DEBUG_DESTROY_NODE(call); + + if (!isLocAlloc) + { + DEBUG_DESTROY_NODE(call); + } fgMorphStmtBlockOps(block, stmt); gtUpdateStmtSideEffects(stmt); From 45f648e35579dbd59037cbbf25381587a68d2a29 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 22 Feb 2025 17:14:51 -0800 Subject: [PATCH 12/27] make new call instead of trying to hack up the old one --- src/coreclr/jit/helperexpansion.cpp | 36 ++++++++++++----------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 7538d7a41115b2..cc7a5a189e9260 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -2857,21 +2857,6 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, return false; } - // Remove these args since we may leave the call in the IR as a normal helper. - // (or just make a new call?) - // - call->gtArgs.ResetFinalArgsAndABIInfo(); - if (stackLocalAddressArg != nullptr) - { - call->gtArgs.Remove(stackLocalAddressArg); - } - if (elemSizeArg != nullptr) - { - call->gtArgs.Remove(elemSizeArg); - } - call->gtArgs.ArgsComplete(this, call); - call->gtArgs.AddFinalArgsAndDetermineABIInfo(this, call); - // If we have an elem size arg, this is intended to be a localloc/heapalloc // // Note we may have figured out the array length after we did the @@ -3000,11 +2985,24 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, block->SetCond(heapallocInEdge, locallocInEdge); // Now fill in the heapalloc block. + // + // Create a helper call just like call, but without the extra arguments + // + GenTreeCall* newCall = gtNewCallNode(CT_HELPER, call->gtCallMethHnd, call->TypeGet()); + + newCall->gtArgs.PushBack(this, NewCallArg::Primitive(call->gtArgs.GetArgByIndex(typeArgIndex)->GetNode())); + newCall->gtArgs.PushBack(this, NewCallArg::Primitive(call->gtArgs.GetArgByIndex(lengthArgIndex)->GetNode())); + newCall->gtFlags = call->gtFlags; +#if defined(FEATURE_READYTORUN) + newCall->setEntryPoint(call->gtEntryPoint); +#endif // FEATURE_READYTORUN + newCall = fgMorphArgs(newCall); + // We expect *callUse's user to be a local store. // assert((*callUse)->gtNext->OperIs(GT_STORE_LCL_VAR)); unsigned const useLclNum = (*callUse)->gtNext->AsLclVarCommon()->GetLclNum(); - GenTree* const heapAllocStore = gtNewStoreLclVarNode(useLclNum, call); + GenTree* const heapAllocStore = gtNewStoreLclVarNode(useLclNum, newCall); Statement* const heapAllocStmt = fgNewStmtFromTree(heapAllocStore); gtUpdateStmtSideEffects(heapAllocStmt); @@ -3090,11 +3088,7 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, // Replace call with local address // *callUse = gtCloneExpr(stackLocalAddress); - - if (!isLocAlloc) - { - DEBUG_DESTROY_NODE(call); - } + DEBUG_DESTROY_NODE(call); fgMorphStmtBlockOps(block, stmt); gtUpdateStmtSideEffects(stmt); From 5540b26d1ecc2ba6c9e94534981ac7bad48f3548 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 25 Feb 2025 14:44:42 -0800 Subject: [PATCH 13/27] temp fix for linux x64 issue with misaligned frame --- src/coreclr/jit/codegen.h | 1 + src/coreclr/jit/codegenxarch.cpp | 1 + src/coreclr/jit/objectalloc.cpp | 7 +++++++ 3 files changed, 9 insertions(+) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index b26c93534b2f9d..01c2a96608842a 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -125,6 +125,7 @@ class CodeGen final : public CodeGenInterface //------------------------------------------------------------------------- + bool genLocallocUsed; // true if we have used localloc in the method bool genUseBlockInit; // true if we plan to block-initialize the local stack frame unsigned genInitStkLclCnt; // The count of local variables that we need to zero init diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 487052f1d976d9..3f5ad78613a24c 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2850,6 +2850,7 @@ void CodeGen::genLclHeap(GenTree* tree) { assert(tree->OperGet() == GT_LCLHEAP); assert(compiler->compLocallocUsed); + genLocallocUsed = true; GenTree* size = tree->AsOp()->gtOp1; noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp index 6c973fe1ce37a8..3b00fca94566d7 100644 --- a/src/coreclr/jit/objectalloc.cpp +++ b/src/coreclr/jit/objectalloc.cpp @@ -926,6 +926,13 @@ void ObjectAllocator::MorphNewArrNodeIntoLocAlloc( // Notify the compiler; this disables fast tail calls (for now) // comp->compLocallocUsed = true; + +#ifdef UNIX_AMD64_ABI + // Ensure we don't end up with misaligned frames, + // if we manage to dead code this newarr. + // + comp->opts.compNeedToAlignFrame = true; +#endif } //------------------------------------------------------------------------ From 7cc123cad86b4b9a1c03ebb08ec18e38a69f269d Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 8 May 2026 16:00:18 -0700 Subject: [PATCH 14/27] Fix MorphNewArrNodeIntoStackAlloc body to match merged signature The merge of upstream/main into StackallocLocallocConditional2 left the body of MorphNewArrNodeIntoStackAlloc in HEAD's old form (taking GenTree* len) while the header was updated to main's new signature (unsigned length, unsigned blockSize, returning unsigned int). Replace the body's signature and preamble with main's version; the rest of the function (including the return lclNum already added during the merge) is unchanged. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/objectalloc.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp index 2a39cc7cb52cc9..ed3adb0a829fbc 100644 --- a/src/coreclr/jit/objectalloc.cpp +++ b/src/coreclr/jit/objectalloc.cpp @@ -1729,24 +1729,30 @@ GenTree* ObjectAllocator::MorphAllocObjNodeIntoHelperCall(GenTreeAllocObj* alloc // Arguments: // newArr - GT_CALL that will be replaced by helper call. // clsHnd - class representing the type of the array -// len - tree representing length of the array (must be a constant) +// length - length of the array +// blockSize - size of the layout // block - a basic block where newArr is // stmt - a statement where newArr is // +// Return Value: +// local num for the new stack allocated local +// // Notes: // This function can insert additional statements before stmt. // -void ObjectAllocator::MorphNewArrNodeIntoStackAlloc( - GenTreeCall* newArr, CORINFO_CLASS_HANDLE clsHnd, GenTree* len, BasicBlock* block, Statement* stmt) +unsigned int ObjectAllocator::MorphNewArrNodeIntoStackAlloc(GenTreeCall* newArr, + CORINFO_CLASS_HANDLE clsHnd, + unsigned int length, + unsigned int blockSize, + BasicBlock* block, + Statement* stmt) { assert(newArr != nullptr); assert(m_AnalysisDone); assert(clsHnd != NO_CLASS_HANDLE); assert(newArr->IsHelperCall()); assert(newArr->GetHelperNum() != CORINFO_HELP_NEWARR_1_MAYBEFROZEN); - assert(len->IsCnsIntOrI()); - const unsigned length = (unsigned int)len->AsIntCon()->IconValue(); const bool shortLifetime = false; const bool alignTo8 = newArr->GetHelperNum() == CORINFO_HELP_NEWARR_1_ALIGN8; const unsigned int lclNum = m_compiler->lvaGrabTemp(shortLifetime DEBUGARG("stack allocated array temp")); From cf5063e688d1e59bbdb8e82b8c492d4091904037 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 8 May 2026 16:17:26 -0700 Subject: [PATCH 15/27] codegenwasm: honor GTF_LCLHEAP_MUSTINIT in genLclHeap The five other codegen backends (xarch, arm, arm64, loongarch64, riscv64) zero LCLHEAP allocations when either info.compInitMem is set or the GTF_LCLHEAP_MUSTINIT flag is present on the LCLHEAP node. The wasm backend was missing the flag check, so an LCLHEAP marked MUSTINIT (e.g. the runtime length stack-array path that flows through helperexpansion.cpp) would not be zeroed when compInitMem is false. Validated by building clr.wasmjit subset. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/codegenwasm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 0d89746cf97da6..25a47bc5f83d20 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -2884,7 +2884,7 @@ void CodeGen::genLclHeap(GenTree* tree) assert(m_compiler->compLocallocUsed); assert(isFramePointerUsed()); - bool const needsZeroing = m_compiler->info.compInitMem; + bool const needsZeroing = m_compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT); GenTree* const size = tree->AsOp()->gtOp1; // We reserve this amount of space below any allocation for From ad2add431c63f1f21fd8aea9f96aeb154b62e82e Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 8 May 2026 16:22:15 -0700 Subject: [PATCH 16/27] Add Compiler::gtMustZeroLocalloc helper and use it at LCLHEAP sites Introduce an inline helper on Compiler that returns true iff a given LCLHEAP node must zero its allocation, encapsulating the common 'info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT)' check used by every codegen and LSRA backend. Replace the 12 inlined occurrences across xarch, arm, arm64, loongarch64, riscv64, and wasm with calls to the helper. Validated by building clr.jit and clr.wasmjit subsets. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/codegenarm.cpp | 2 +- src/coreclr/jit/codegenarm64.cpp | 2 +- src/coreclr/jit/codegenloongarch64.cpp | 2 +- src/coreclr/jit/codegenriscv64.cpp | 2 +- src/coreclr/jit/codegenwasm.cpp | 2 +- src/coreclr/jit/codegenxarch.cpp | 2 +- src/coreclr/jit/compiler.h | 9 +++++++++ src/coreclr/jit/lsraarm.cpp | 2 +- src/coreclr/jit/lsraarm64.cpp | 2 +- src/coreclr/jit/lsraloongarch64.cpp | 4 ++-- src/coreclr/jit/lsrariscv64.cpp | 4 ++-- src/coreclr/jit/lsraxarch.cpp | 2 +- 12 files changed, 22 insertions(+), 13 deletions(-) diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 45739d568b7103..603960e349ef61 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -391,7 +391,7 @@ void CodeGen::genLclHeap(GenTree* tree) GenTree* size = tree->AsOp()->gtOp1; noway_assert((genActualType(size->gtType) == TYP_INT) || (genActualType(size->gtType) == TYP_I_IMPL)); - bool const initMem = m_compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT); + bool const initMem = m_compiler->gtMustZeroLocalloc(tree); // Result of localloc will be returned in regCnt. // Also it used as temporary register in code generation diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 5ae9fd0cb0713a..ad6983689f3ea3 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -3081,7 +3081,7 @@ void CodeGen::genLclHeap(GenTree* tree) noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes noway_assert(genStackLevel == 0); // Can't have anything on the stack - bool initMem = m_compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT); + bool initMem = m_compiler->gtMustZeroLocalloc(tree); // compute the amount of memory to allocate to properly STACK_ALIGN. size_t amount = 0; diff --git a/src/coreclr/jit/codegenloongarch64.cpp b/src/coreclr/jit/codegenloongarch64.cpp index 5fd2561a28d035..ca9c8b1de64e16 100644 --- a/src/coreclr/jit/codegenloongarch64.cpp +++ b/src/coreclr/jit/codegenloongarch64.cpp @@ -1461,7 +1461,7 @@ void CodeGen::genLclHeap(GenTree* tree) noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes noway_assert(genStackLevel == 0); // Can't have anything on the stack - bool const initMem = m_compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT); + bool const initMem = m_compiler->gtMustZeroLocalloc(tree); // compute the amount of memory to allocate to properly STACK_ALIGN. size_t amount = 0; diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index fc2b8a5b6746bb..b72b2b15a51bf3 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -1451,7 +1451,7 @@ void CodeGen::genLclHeap(GenTree* tree) noway_assert(isFramePointerUsed()); // localloc requires Frame Pointer to be established since SP changes noway_assert(genStackLevel == 0); // Can't have anything on the stack - bool const initMem = m_compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT); + bool const initMem = m_compiler->gtMustZeroLocalloc(tree); const target_size_t pageSize = m_compiler->eeGetPageSize(); // According to RISC-V Privileged ISA page size is 4KiB diff --git a/src/coreclr/jit/codegenwasm.cpp b/src/coreclr/jit/codegenwasm.cpp index 25a47bc5f83d20..e1f6a813e426d4 100644 --- a/src/coreclr/jit/codegenwasm.cpp +++ b/src/coreclr/jit/codegenwasm.cpp @@ -2884,7 +2884,7 @@ void CodeGen::genLclHeap(GenTree* tree) assert(m_compiler->compLocallocUsed); assert(isFramePointerUsed()); - bool const needsZeroing = m_compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT); + bool const needsZeroing = m_compiler->gtMustZeroLocalloc(tree); GenTree* const size = tree->AsOp()->gtOp1; // We reserve this amount of space below any allocation for diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 9e7c567190e2ae..1e5771621e7cba 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -2767,7 +2767,7 @@ void CodeGen::genLclHeap(GenTree* tree) target_size_t stackAdjustment = 0; target_size_t locAllocStackOffset = 0; - bool const initMem = m_compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT); + bool const initMem = m_compiler->gtMustZeroLocalloc(tree); // compute the amount of memory to allocate to properly STACK_ALIGN. size_t amount = 0; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 875106b746944b..6790224d4414e6 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3885,6 +3885,15 @@ class Compiler bool gtTreeHasLocalRead(GenTree* tree, unsigned lclNum); bool gtTreeHasLocalStore(GenTree* tree, unsigned lclNum); + // Returns true iff the LCLHEAP node "tree" must zero-initialize its + // allocation, either because the method requests init-mem semantics or + // because the node carries the GTF_LCLHEAP_MUSTINIT flag. + bool gtMustZeroLocalloc(GenTree* tree) + { + assert(tree->OperIs(GT_LCLHEAP)); + return info.compInitMem || ((tree->gtFlags & GTF_LCLHEAP_MUSTINIT) != 0); + } + void gtSetStmtInfo(Statement* stmt); // Returns "true" iff "node" has any of the side effects in "flags". diff --git a/src/coreclr/jit/lsraarm.cpp b/src/coreclr/jit/lsraarm.cpp index ea80c665d5139d..958c1a9ef7e3bf 100644 --- a/src/coreclr/jit/lsraarm.cpp +++ b/src/coreclr/jit/lsraarm.cpp @@ -68,7 +68,7 @@ int LinearScan::BuildLclHeap(GenTree* tree) { internalIntCount = 0; } - else if (!(m_compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) + else if (!m_compiler->gtMustZeroLocalloc(tree)) { // No need to initialize allocated stack space. if (sizeVal < m_compiler->eeGetPageSize()) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index a64eac0f298ef2..309f214077de42 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1186,7 +1186,7 @@ int LinearScan::BuildNode(GenTree* tree) else { srcCount = 1; - if (!(m_compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) + if (!m_compiler->gtMustZeroLocalloc(tree)) { buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); diff --git a/src/coreclr/jit/lsraloongarch64.cpp b/src/coreclr/jit/lsraloongarch64.cpp index 644be392111bd1..394a768e20d889 100644 --- a/src/coreclr/jit/lsraloongarch64.cpp +++ b/src/coreclr/jit/lsraloongarch64.cpp @@ -452,7 +452,7 @@ int LinearScan::BuildNode(GenTree* tree) { // Need no internal registers } - else if (!(m_compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) + else if (!m_compiler->gtMustZeroLocalloc(tree)) { // No need to initialize allocated stack space. if (sizeVal < m_compiler->eeGetPageSize()) @@ -471,7 +471,7 @@ int LinearScan::BuildNode(GenTree* tree) else { srcCount = 1; - if (!(m_compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) + if (!m_compiler->gtMustZeroLocalloc(tree)) { buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); diff --git a/src/coreclr/jit/lsrariscv64.cpp b/src/coreclr/jit/lsrariscv64.cpp index 9edbf8779a278b..7b94b12d101ef7 100644 --- a/src/coreclr/jit/lsrariscv64.cpp +++ b/src/coreclr/jit/lsrariscv64.cpp @@ -619,7 +619,7 @@ int LinearScan::BuildNode(GenTree* tree) { // Need no internal registers } - else if (!(m_compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) + else if (!m_compiler->gtMustZeroLocalloc(tree)) { // No need to initialize allocated stack space. if (sizeVal < m_compiler->eeGetPageSize()) @@ -640,7 +640,7 @@ int LinearScan::BuildNode(GenTree* tree) else { srcCount = 1; - if (!(m_compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) + if (!m_compiler->gtMustZeroLocalloc(tree)) { buildInternalIntRegisterDefForNode(tree); buildInternalIntRegisterDefForNode(tree); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 918ec46889f73d..c289c71c8319d8 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1867,7 +1867,7 @@ int LinearScan::BuildLclHeap(GenTree* tree) } else { - if (!(m_compiler->info.compInitMem || (tree->gtFlags & GTF_LCLHEAP_MUSTINIT))) + if (!m_compiler->gtMustZeroLocalloc(tree)) { // For regCnt buildInternalIntRegisterDefForNode(tree); From 62f33f4111e4829a4ad29638e32c49a91c22acdb Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 8 May 2026 19:31:29 -0700 Subject: [PATCH 17/27] Remove unused locals after merge regression basicBlockHasBackwardJump and basicBlockInHandler in MorphAllocObjNodes are no longer used after the morph loop body was refactored into MorphAllocObjNodeHelperArr (which queries the block flags directly). MSVC tolerates this via /wd4189; clang/gcc on Linux/Mac warn. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/objectalloc.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp index ed3adb0a829fbc..52c88bf0b7a578 100644 --- a/src/coreclr/jit/objectalloc.cpp +++ b/src/coreclr/jit/objectalloc.cpp @@ -1259,10 +1259,8 @@ bool ObjectAllocator::MorphAllocObjNodes() for (BasicBlock* const block : m_compiler->Blocks()) { - const bool basicBlockHasNewObj = block->HasFlag(BBF_HAS_NEWOBJ); - const bool basicBlockHasNewArr = block->HasFlag(BBF_HAS_NEWARR); - const bool basicBlockHasBackwardJump = block->HasFlag(BBF_BACKWARD_JUMP); - const bool basicBlockInHandler = block->hasHndIndex(); + const bool basicBlockHasNewObj = block->HasFlag(BBF_HAS_NEWOBJ); + const bool basicBlockHasNewArr = block->HasFlag(BBF_HAS_NEWARR); if (!basicBlockHasNewObj && !basicBlockHasNewArr) { From 47f47fe00edcd05c109973956fbbb1fd7db478cb Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 8 May 2026 19:32:00 -0700 Subject: [PATCH 18/27] Fix off-by-one in stack-array localloc size round-up The pointer-size round-up in fgExpandStackArrayAllocation was using (size + TARGET_POINTER_SIZE) & ~(TARGET_POINTER_SIZE - 1) which over-allocates by one pointer when the size is already aligned, and can push already-aligned sizes over the runtime stack threshold. The standard align-up formula is (size + TPS - 1) & ~(TPS - 1). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/helperexpansion.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 875d411ebaa779..b9c051026d2280 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -2944,9 +2944,9 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, if ((elemSizeValue % TARGET_POINTER_SIZE) != 0) { // Round size up to TARGET_POINTER_SIZE. - // size = (size + TPS) & ~(TPS-1) + // size = (size + TPS - 1) & ~(TPS - 1) // - GenTree* const roundSize = gtNewIconNode(TARGET_POINTER_SIZE, TYP_I_IMPL); + GenTree* const roundSize = gtNewIconNode(TARGET_POINTER_SIZE - 1, TYP_I_IMPL); GenTree* const biasedSize = gtNewOperNode(GT_ADD, TYP_I_IMPL, totalSize, roundSize); GenTree* const mask = gtNewIconNode(TARGET_POINTER_SIZE - 1, TYP_I_IMPL); GenTree* const invMask = gtNewOperNode(GT_NOT, TYP_I_IMPL, mask); From f1454bcf5a0a58975ec8b7decf5ffdd9c3279872 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 8 May 2026 19:32:59 -0700 Subject: [PATCH 19/27] Restore variable-length newarr admission for localloc dispatch Pre-merge HEAD admitted any 2-arg newarr helper as OAT_NEWARR and filtered constant-vs-variable inside the morph loop. Main's refactor extracted this into AllocationKind() and added an IsCnsIntOrI() gate as cleanup (a no-op at the time because no consumer existed for variable lengths). The merge inherited that gate, making the new m_UseLocalloc dispatch in MorphAllocObjNodeHelperArr unreachable. Relax the gate to also admit variable-length newarr when localloc is enabled, preserving main's behavior when localloc is off. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/objectalloc.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp index 52c88bf0b7a578..3c43448ad95201 100644 --- a/src/coreclr/jit/objectalloc.cpp +++ b/src/coreclr/jit/objectalloc.cpp @@ -1224,9 +1224,13 @@ ObjectAllocator::ObjectAllocationType ObjectAllocator::AllocationKind(GenTree* t case CORINFO_HELP_NEWARR_1_DIRECT: case CORINFO_HELP_NEWARR_1_ALIGN8: { - if ((call->gtArgs.CountUserArgs() == 2) && call->gtArgs.GetUserArgByIndex(1)->GetNode()->IsCnsIntOrI()) + if (call->gtArgs.CountUserArgs() == 2) { - allocType = OAT_NEWARR; + GenTree* const lenArg = call->gtArgs.GetUserArgByIndex(1)->GetNode(); + if (lenArg->IsCnsIntOrI() || m_UseLocalloc) + { + allocType = OAT_NEWARR; + } } break; } From e3fcb6139c9a33497467be8ff45b9888c1aa38b2 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 8 May 2026 19:33:13 -0700 Subject: [PATCH 20/27] Keep localloc'd stack arrays only possibly-stack-pointing for GC tracking When MorphAllocObjNodeHelperArr dispatches a newarr to MorphNewArrNode- IntoLocAlloc, fgExpandStackArrayAllocation still emits a runtime heap fallback (the size check / heap helper path). The result local can therefore hold either a stack pointer or a real heap object reference, and must remain GC-reportable for the lifetime of the allocation. Previously MorphAllocObjNode unconditionally added the local to both m_PossiblyStackPointingPointers and m_DefinitelyStackPointingPointers, which causes it to be retyped to TYP_I_IMPL (a raw pointer the GC ignores). For the heap-fallback path that creates a GC hole. Plumb a m_definitelyStackPointing flag on AllocationCandidate (default true). Both localloc dispatch sites now clear it so the local stays in m_PossiblyStackPointingPointers only, retains TYP_BYREF, and is reported by the GC as an interior pointer. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/objectalloc.cpp | 11 ++++++++++- src/coreclr/jit/objectalloc.h | 7 +++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp index 3c43448ad95201..52d2e08429d3a6 100644 --- a/src/coreclr/jit/objectalloc.cpp +++ b/src/coreclr/jit/objectalloc.cpp @@ -1319,8 +1319,11 @@ void ObjectAllocator::MorphAllocObjNode(AllocationCandidate& candidate) // We keep the set of possibly-stack-pointing pointers as a superset of the set of // definitely-stack-pointing pointers. All definitely-stack-pointing pointers are in both // sets. - MarkLclVarAsDefinitelyStackPointing(lclNum); MarkLclVarAsPossiblyStackPointing(lclNum); + if (candidate.m_definitelyStackPointing) + { + MarkLclVarAsDefinitelyStackPointing(lclNum); + } // If this was conditionally escaping enumerator, establish a connection between this local // and the enumeratorLocal we already allocated. This is needed because we do early rewriting @@ -1562,6 +1565,9 @@ bool ObjectAllocator::MorphAllocObjNodeHelperArr(AllocationCandidate& candidate) JITDUMP("Allocating V%02u on the stack [via localloc]\n", candidate.m_lclNum); MorphNewArrNodeIntoLocAlloc(data->AsCall(), clsHnd, len, candidate.m_block, candidate.m_statement); m_compiler->Metrics.LocallocAllocatedArrays++; + // helperexpansion may take the heap fallback at runtime, so the local is only + // possibly (not definitely) stack-pointing and must remain GC-reportable. + candidate.m_definitelyStackPointing = false; return true; } @@ -1588,6 +1594,9 @@ bool ObjectAllocator::MorphAllocObjNodeHelperArr(AllocationCandidate& candidate) JITDUMP("Allocating V%02u on the stack [via localloc, in loop]\n", candidate.m_lclNum); MorphNewArrNodeIntoLocAlloc(data->AsCall(), clsHnd, len, candidate.m_block, candidate.m_statement); m_compiler->Metrics.LocallocAllocatedArrays++; + // helperexpansion may take the heap fallback at runtime, so the local is only + // possibly (not definitely) stack-pointing and must remain GC-reportable. + candidate.m_definitelyStackPointing = false; return true; } diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h index 38e9b9a3bd3d3a..4df2e12ca35bea 100644 --- a/src/coreclr/jit/objectalloc.h +++ b/src/coreclr/jit/objectalloc.h @@ -149,6 +149,7 @@ class ObjectAllocator final : public Phase , m_allocType(allocType) , m_onHeapReason(nullptr) , m_bashCall(false) + , m_definitelyStackPointing(true) { } @@ -159,6 +160,12 @@ class ObjectAllocator final : public Phase ObjectAllocationType const m_allocType; const char* m_onHeapReason; bool m_bashCall; + // True if a successful stack-allocation of this candidate yields a local that + // definitely points at stack memory. False when the morph leaves a runtime + // heap fallback in place (e.g. the localloc/heapalloc split for runtime-sized + // arrays); in that case the local is only possibly stack-pointing and must + // remain GC-reportable. + bool m_definitelyStackPointing; }; typedef SmallHashTable LocalToLocalMap; From 0c3e30323a172abc76df3d64b9c17308c8a663f9 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 8 May 2026 19:38:16 -0700 Subject: [PATCH 21/27] Validate length before stack-array localloc to avoid memory corruption The previous size guard compared the multiply-and-add result (elemSize * length + base) against the runtime stack-alloc limit, using a signed compare on TYP_INT against operands typed TYP_I_IMPL. For negative length, the I_IMPL multiply wraps to a hugely negative value; the addition wraps further; the signed compare can then pass and localloc allocates a tiny buffer for what the runtime believes is a huge array, corrupting the stack instead of throwing OverflowException. A length near INT32_MAX fails the same way through plain elemSize * length overflow. Replace the post-multiply check with a JIT-time-precomputed length-based unsigned compare: maxSafeLength = (stackLimit - base - align8Pad) / elemSize - 1 if elemSize is not pointer-aligned (round-up slack) if ((uint)length > maxSafeLength) goto heapallocBlock; This catches negative lengths (which look huge in unsigned space) and near-INT32_MAX overflows in a single compare. The intermediate totalSize stays where it is; even if it wraps before the guard, it is only consumed in locallocBlock, which we only enter when the length is provably safe. The heap fallback runs the original CORINFO_HELP_NEWARR_1_* helper, which raises OverflowException for negative lengths. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/helperexpansion.cpp | 40 ++++++++++++++++++++++++----- 1 file changed, 34 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index b9c051026d2280..e60fb27b19f480 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -2976,13 +2976,41 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, gtUpdateStmtSideEffects(totalSizeStmt); fgInsertStmtBefore(block, stmt, totalSizeStmt); - // Check the length against our runtime threshold. For now we just check against - // the fixed length limit (528 bytes). + // Check the length against a JIT-time-precomputed safe upper bound, + // using an unsigned compare so that negative lengths (which signed + // would treat as "small") are routed to the heap-fallback helper. The + // helper validates length and raises OverflowException for negatives + // or when (length * elemSize) overflows. // - GenTree* const totalSizeForCheck = gtNewLclVarNode(totalSizeTemp); - GenTree* const runtimeSizeLimit = gtNewIconNode((unsigned)JitConfig.JitObjectStackAllocationSize(), TYP_I_IMPL); - GenTree* const runtimeSizeCompare = gtNewOperNode(GT_GT, TYP_INT, totalSizeForCheck, runtimeSizeLimit); - GenTree* const runtimeSizeCheck = gtNewOperNode(GT_JTRUE, TYP_VOID, runtimeSizeCompare); + // maxSafeLength is the largest length for which: + // base + payload (+ optional align8 pad) <= stackLimit + // and for which no intermediate I_IMPL multiply/add can wrap. + // + size_t const stackLimit = (size_t)(unsigned)JitConfig.JitObjectStackAllocationSize(); + size_t const baseBytes = (size_t)OFFSETOF__CORINFO_Array__data; +#ifndef TARGET_64BIT + size_t const align8Pad = isAlign8 ? 4 : 0; +#else + size_t const align8Pad = 0; +#endif + size_t maxSafeLength = 0; + if (stackLimit > baseBytes + align8Pad) + { + assert(elemSizeValue > 0); + maxSafeLength = (stackLimit - baseBytes - align8Pad) / elemSizeValue; + // The pointer-size round-up below can add up to (TPS - 1) bytes; + // trim one element to absorb that slack. + if (((elemSizeValue % TARGET_POINTER_SIZE) != 0) && (maxSafeLength > 0)) + { + maxSafeLength--; + } + } + + GenTree* const lengthForCheck = gtCloneExpr(lengthArg); + GenTree* const lengthLimit = gtNewIconNode((ssize_t)maxSafeLength, TYP_INT); + GenTree* const runtimeSizeCompare = gtNewOperNode(GT_GT, TYP_INT, lengthForCheck, lengthLimit); + runtimeSizeCompare->gtFlags |= GTF_UNSIGNED; + GenTree* const runtimeSizeCheck = gtNewOperNode(GT_JTRUE, TYP_VOID, runtimeSizeCompare); Statement* const runtimeSizeCheckStmt = fgNewStmtFromTree(runtimeSizeCheck); gtUpdateStmtSideEffects(runtimeSizeCheckStmt); From 0b27c021b35216a1e09a435eba3112b186efdb4a Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 8 May 2026 19:41:38 -0700 Subject: [PATCH 22/27] Apply jit-format --fix --untidy Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/helperexpansion.cpp | 4 ++-- src/coreclr/jit/objectalloc.cpp | 3 ++- src/coreclr/jit/objectalloc.h | 6 +++--- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index e60fb27b19f480..0cce252d2bacc9 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -3006,8 +3006,8 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, } } - GenTree* const lengthForCheck = gtCloneExpr(lengthArg); - GenTree* const lengthLimit = gtNewIconNode((ssize_t)maxSafeLength, TYP_INT); + GenTree* const lengthForCheck = gtCloneExpr(lengthArg); + GenTree* const lengthLimit = gtNewIconNode((ssize_t)maxSafeLength, TYP_INT); GenTree* const runtimeSizeCompare = gtNewOperNode(GT_GT, TYP_INT, lengthForCheck, lengthLimit); runtimeSizeCompare->gtFlags |= GTF_UNSIGNED; GenTree* const runtimeSizeCheck = gtNewOperNode(GT_JTRUE, TYP_VOID, runtimeSizeCompare); diff --git a/src/coreclr/jit/objectalloc.cpp b/src/coreclr/jit/objectalloc.cpp index 52d2e08429d3a6..6b99cdfa03e3b9 100644 --- a/src/coreclr/jit/objectalloc.cpp +++ b/src/coreclr/jit/objectalloc.cpp @@ -1855,7 +1855,8 @@ void ObjectAllocator::MorphNewArrNodeIntoLocAlloc( // operand for the stack local as an argument // GenTree* const elemSizeNode = m_compiler->gtNewIconNode(elemSize, TYP_I_IMPL); - newArr->gtArgs.PushBack(m_compiler, NewCallArg::Primitive(elemSizeNode).WellKnown(WellKnownArg::StackArrayElemSize)); + newArr->gtArgs.PushBack(m_compiler, + NewCallArg::Primitive(elemSizeNode).WellKnown(WellKnownArg::StackArrayElemSize)); newArr->gtCallMoreFlags |= GTF_CALL_M_STACK_ARRAY; // Retype the call result as a byref (we may decide to heap allocate at runtime). diff --git a/src/coreclr/jit/objectalloc.h b/src/coreclr/jit/objectalloc.h index 4df2e12ca35bea..cedc50d6ed2fbd 100644 --- a/src/coreclr/jit/objectalloc.h +++ b/src/coreclr/jit/objectalloc.h @@ -165,7 +165,7 @@ class ObjectAllocator final : public Phase // heap fallback in place (e.g. the localloc/heapalloc split for runtime-sized // arrays); in that case the local is only possibly stack-pointing and must // remain GC-reportable. - bool m_definitelyStackPointing; + bool m_definitelyStackPointing; }; typedef SmallHashTable LocalToLocalMap; @@ -268,8 +268,8 @@ class ObjectAllocator final : public Phase unsigned int blockSize, BasicBlock* block, Statement* stmt); - void MorphNewArrNodeIntoLocAlloc( - GenTreeCall* newArr, CORINFO_CLASS_HANDLE clsHnd, GenTree* length, BasicBlock* block, Statement* stmt); + void MorphNewArrNodeIntoLocAlloc( + GenTreeCall* newArr, CORINFO_CLASS_HANDLE clsHnd, GenTree* length, BasicBlock* block, Statement* stmt); struct BuildConnGraphVisitorCallbackData; void AnalyzeParentStack(ArrayStack* parentStack, unsigned int lclNum, BasicBlock* block); void UpdateAncestorTypes( From ebd2e516ebef13f0381e94405f67b85486b52a66 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 9 May 2026 08:45:26 -0700 Subject: [PATCH 23/27] Use length's actual type for stack-array localloc length check When the newarr helper's length argument is TYP_LONG (some helper variants), constructing GT_GT with TYP_INT op and a TYP_INT limit fires the codegen assert `genTypeSize(type) >= max(genTypeSize(op1Type), genTypeSize(op2Type))`. Use `genActualType(lengthArg)` for the icon and the comparison so the operands match. The unsigned compare semantics are preserved via GTF_UNSIGNED, and `maxSafeLength` is in-range for both INT and LONG. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/helperexpansion.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index 0cce252d2bacc9..c82d18edb320a2 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -3006,9 +3006,10 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, } } - GenTree* const lengthForCheck = gtCloneExpr(lengthArg); - GenTree* const lengthLimit = gtNewIconNode((ssize_t)maxSafeLength, TYP_INT); - GenTree* const runtimeSizeCompare = gtNewOperNode(GT_GT, TYP_INT, lengthForCheck, lengthLimit); + GenTree* const lengthForCheck = gtCloneExpr(lengthArg); + var_types const lengthType = genActualType(lengthForCheck); + GenTree* const lengthLimit = gtNewIconNode((ssize_t)maxSafeLength, lengthType); + GenTree* const runtimeSizeCompare = gtNewOperNode(GT_GT, TYP_INT, lengthForCheck, lengthLimit); runtimeSizeCompare->gtFlags |= GTF_UNSIGNED; GenTree* const runtimeSizeCheck = gtNewOperNode(GT_JTRUE, TYP_VOID, runtimeSizeCompare); From 5dee83b9020052e9367b333ea24c5d08e64b4b6b Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 9 May 2026 08:45:41 -0700 Subject: [PATCH 24/27] Add regression tests for variable-length stack-array localloc Modeled on Delegates.cs: five [Fact] tests covering the localloc dispatch path for variable-length newarr. * TestSmall - non-escaping variable-length int[8], expects stack alloc. * TestLarge - variable-length int[10000], expects heap fallback. * TestNegative, TestIntMin - negative lengths must throw OverflowException through the heap helper (validates the JIT-time length guard). * TestHuge - long[int.MaxValue]: elem * length overflows; helper must throw without stack corruption. Verified the negative-length tests fail without the length guard and pass with it. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../LocallocStackAlloc.cs | 180 ++++++++++++++++++ .../LocallocStackAlloc.csproj | 15 ++ 2 files changed, 195 insertions(+) create mode 100644 src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs create mode 100644 src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.csproj diff --git a/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs b/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs new file mode 100644 index 00000000000000..161f7ac8a1d10d --- /dev/null +++ b/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs @@ -0,0 +1,180 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using Xunit; + +enum AllocationKind +{ + Heap, + Stack, + Undefined +} + +delegate int Test(); + +public class LocallocStackAlloc +{ + static bool GCStressEnabled() + { + return Environment.GetEnvironmentVariable("DOTNET_GCStress") != null; + } + + static AllocationKind StackAllocation() + { + AllocationKind expectedAllocationKind = AllocationKind.Stack; + if (GCStressEnabled()) + { + Console.WriteLine("GCStress is enabled"); + expectedAllocationKind = AllocationKind.Undefined; + } + return expectedAllocationKind; + } + + static AllocationKind HeapAllocation() + { + AllocationKind expectedAllocationKind = AllocationKind.Heap; + if (GCStressEnabled()) + { + Console.WriteLine("GCStress is enabled"); + expectedAllocationKind = AllocationKind.Undefined; + } + return expectedAllocationKind; + } + + static int CallTestAndVerifyAllocation(Test test, int expectedResult, AllocationKind expectedAllocationsKind, bool throws = false) + { + string methodName = test.Method.Name; + try + { + long allocatedBytesBefore = GC.GetAllocatedBytesForCurrentThread(); + int testResult = test(); + long allocatedBytesAfter = GC.GetAllocatedBytesForCurrentThread(); + + if (throws) + { + Console.WriteLine($"FAILURE ({methodName}): expected exception, got {testResult}"); + return -1; + } + + if (testResult != expectedResult) + { + Console.WriteLine($"FAILURE ({methodName}): expected {expectedResult}, got {testResult}"); + return -1; + } + + if ((expectedAllocationsKind == AllocationKind.Stack) && (allocatedBytesBefore != allocatedBytesAfter)) + { + Console.WriteLine($"FAILURE ({methodName}): unexpected allocation of {allocatedBytesAfter - allocatedBytesBefore} bytes"); + return -1; + } + + if ((expectedAllocationsKind == AllocationKind.Heap) && (allocatedBytesBefore == allocatedBytesAfter)) + { + Console.WriteLine($"FAILURE ({methodName}): unexpected stack allocation"); + return -1; + } + + Console.WriteLine($"SUCCESS ({methodName})"); + return 100; + } + catch (Exception e) + { + if (throws) + { + Console.WriteLine($"SUCCESS ({methodName}) caught {e.GetType().Name}"); + return 100; + } + Console.WriteLine($"FAILURE ({methodName}): unexpected {e.GetType().Name}: {e.Message}"); + return -1; + } + } + + // Keep JIT from constant-folding the length. + [MethodImpl(MethodImplOptions.NoInlining)] + static int OpaqueLength(int n) => n; + + // Variable-length stack-allocated int[] within the localloc threshold. + // Sums the elements after writing them. + [MethodImpl(MethodImplOptions.NoInlining)] + static int VariableLengthSmall() + { + int n = OpaqueLength(8); + int[] array = new int[n]; + int sum = 0; + for (int i = 0; i < array.Length; i++) + { + array[i] = i + 1; + } + for (int i = 0; i < array.Length; i++) + { + sum += array[i]; + } + return sum + array.Length; + } + + // Variable-length newarr that exceeds the stack-alloc threshold; should be + // routed to the heap helper at runtime instead of corrupting the stack. + [MethodImpl(MethodImplOptions.NoInlining)] + static int VariableLengthLarge() + { + int n = OpaqueLength(10_000); + int[] array = new int[n]; + int sum = 0; + for (int i = 0; i < array.Length; i++) + { + array[i] = 1; + } + for (int i = 0; i < array.Length; i++) + { + sum += array[i]; + } + return sum; + } + + // Negative length must throw OverflowException via the heap helper + // even when the localloc dispatch path is selected. + [MethodImpl(MethodImplOptions.NoInlining)] + static int VariableLengthNegative() + { + int n = OpaqueLength(-1); + int[] array = new int[n]; + return array.Length; + } + + // int.MinValue length must also throw OverflowException; this is the case + // where signed totalSize wraps to a small value if not guarded properly. + [MethodImpl(MethodImplOptions.NoInlining)] + static int VariableLengthIntMin() + { + int n = OpaqueLength(int.MinValue); + int[] array = new int[n]; + return array.Length; + } + + // Length near INT32_MAX with large element size: elemSize * length overflows. + // Helper should raise OutOfMemoryException; no stack corruption. + [MethodImpl(MethodImplOptions.NoInlining)] + static int VariableLengthHuge() + { + int n = OpaqueLength(int.MaxValue); + long[] array = new long[n]; + return array.Length; + } + + [Fact] + public static int TestSmall() => CallTestAndVerifyAllocation(VariableLengthSmall, 8 + (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8), StackAllocation()); + + [Fact] + public static int TestLarge() => CallTestAndVerifyAllocation(VariableLengthLarge, 10_000, HeapAllocation()); + + [Fact] + public static int TestNegative() => CallTestAndVerifyAllocation(VariableLengthNegative, 0, AllocationKind.Undefined, throws: true); + + [Fact] + public static int TestIntMin() => CallTestAndVerifyAllocation(VariableLengthIntMin, 0, AllocationKind.Undefined, throws: true); + + [Fact] + public static int TestHuge() => CallTestAndVerifyAllocation(VariableLengthHuge, 0, AllocationKind.Undefined, throws: true); +} diff --git a/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.csproj b/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.csproj new file mode 100644 index 00000000000000..993c32962762b9 --- /dev/null +++ b/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.csproj @@ -0,0 +1,15 @@ + + + + true + None + True + true + + + + + + + + From 25128b6fd2db4756915eb4694246e34a3bf4cd19 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 9 May 2026 11:50:06 -0700 Subject: [PATCH 25/27] Skip localloc dispatch when newarr result is unused The localloc/heapalloc dispatch path assumes the original NEWARR call's result is consumed by a STORE_LCL_VAR; the heap-fallback block stores its call result into that same local. When the result is unused (e.g. DCE removed the consumer because escape analysis already retyped the call to byref and the destination became dead), `(*callUse)->gtNext` is null and the assert/dereference at the heap-store site AVs. Bail out of the dispatch when the call is not the data-source of a STORE_LCL_VAR root. The call is dead in that case and a later DCE pass will drop it. Caught 53 c0000005 failures across coreclr_tests.run.windows.x64.checked SPMI replay. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/helperexpansion.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index c82d18edb320a2..d43dd40111c4af 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -2900,6 +2900,21 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, bool const isLocAlloc = (elemSizeArg != nullptr); bool const isAlign8 = isLocAlloc && (helper == CORINFO_HELP_NEWARR_1_ALIGN8); + // The localloc/heapalloc dispatch path needs to store the heap-fallback + // call result into the same local that consumes the original call's + // result. If the result is unused (e.g. DCE removed the consumer), + // skip the expansion and let later phases drop the dead call. + // + if (isLocAlloc) + { + GenTree* const stmtRoot = stmt->GetRootNode(); + if (!(stmtRoot->OperIs(GT_STORE_LCL_VAR) && (stmtRoot->AsLclVarCommon()->Data() == call))) + { + JITDUMP("Skipping localloc dispatch for [%06d]: call result is unused\n", dspTreeID(call)); + return false; + } + } + JITDUMP("Expanding new array helper for stack allocated array at [%06d] %sin " FMT_BB ":\n", dspTreeID(call), isLocAlloc ? " into localloc " : "", block->bbNum); DISPTREE(call); From 739a362a4146e5565a02fb965dc6ca1c2fce2eee Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 11 May 2026 10:21:46 -0700 Subject: [PATCH 26/27] Add per-frame budget for conditional stack allocations Each conditional newarr-localloc check now considers two limits: * the per-allocation size (existing JitObjectStackAllocationSize), and * a per-method-invocation cumulative budget on bytes localloc'd via these dispatches. Once the running total would exceed the frame budget, dispatch falls back to the heap helper instead of growing the stack frame without bound (e.g. in loops). * New release JIT config JitObjectStackAllocationFrameSize, defaulting to 8 * JitObjectStackAllocationSize (4224 bytes). * fgExpandStackArrayAllocations owns a lazily-created TYP_I_IMPL local that holds the per-invocation running total. The local is zero-initialized by an explicit store inserted at the top of fgFirstBB on first use, so this works independent of compInitMem. * The runtime check at each expansion combines two unsigned GTs (length>maxSafeLength | running+totalSize>frameLimit) and wraps the result in NE 0 for the JTRUE. * Adds a regression test that allocates inside a long loop and validates that heap fallback kicks in once the budget is exhausted. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/compiler.h | 5 +- src/coreclr/jit/helperexpansion.cpp | 72 ++++++++++++++++--- src/coreclr/jit/jitconfigvalues.h | 4 ++ .../LocallocStackAlloc.cs | 28 ++++++++ 4 files changed, 100 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6f4d839c66a0b1..024621ac21ef33 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6498,7 +6498,10 @@ class Compiler bool fgExpandStaticInitForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); PhaseStatus fgExpandStackArrayAllocations(); - bool fgExpandStackArrayAllocation(BasicBlock* pBlock, Statement* stmt, GenTreeCall* call); + bool fgExpandStackArrayAllocation(BasicBlock* pBlock, + Statement* stmt, + GenTreeCall* call, + unsigned& frameRunningTotalLclNum); PhaseStatus fgVNBasedIntrinsicExpansion(); bool fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call); diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index d43dd40111c4af..a693da516a0070 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -2801,6 +2801,12 @@ PhaseStatus Compiler::fgExpandStackArrayAllocations() // bool modified = false; + // Lazily-allocated TYP_I_IMPL local that accumulates the per-invocation + // total bytes of conditional (localloc) stack allocations. Initialized + // on first use by fgExpandStackArrayAllocation. + // + unsigned frameRunningTotalLclNum = BAD_VAR_NUM; + for (BasicBlock* const block : Blocks()) { for (Statement* const stmt : block->Statements()) @@ -2817,7 +2823,7 @@ PhaseStatus Compiler::fgExpandStackArrayAllocations() continue; } - if (fgExpandStackArrayAllocation(block, stmt, tree->AsCall())) + if (fgExpandStackArrayAllocation(block, stmt, tree->AsCall(), frameRunningTotalLclNum)) { // If we expand, we split the statement's tree // so will be done with this statment. @@ -2854,7 +2860,10 @@ PhaseStatus Compiler::fgExpandStackArrayAllocations() // For known sized arrays, we assume upstream analysis has limited size to // something reasonable, and the allocation is into fixed local storage. // -bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, GenTreeCall* call) +bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, + Statement* stmt, + GenTreeCall* call, + unsigned& frameRunningTotalLclNum) { if (!call->IsHelperCall()) { @@ -3021,12 +3030,48 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, } } - GenTree* const lengthForCheck = gtCloneExpr(lengthArg); - var_types const lengthType = genActualType(lengthForCheck); - GenTree* const lengthLimit = gtNewIconNode((ssize_t)maxSafeLength, lengthType); - GenTree* const runtimeSizeCompare = gtNewOperNode(GT_GT, TYP_INT, lengthForCheck, lengthLimit); - runtimeSizeCompare->gtFlags |= GTF_UNSIGNED; - GenTree* const runtimeSizeCheck = gtNewOperNode(GT_JTRUE, TYP_VOID, runtimeSizeCompare); + GenTree* const lengthForCheck = gtCloneExpr(lengthArg); + var_types const lengthType = genActualType(lengthForCheck); + GenTree* const lengthLimit = gtNewIconNode((ssize_t)maxSafeLength, lengthType); + GenTree* const lengthCompare = gtNewOperNode(GT_GT, TYP_INT, lengthForCheck, lengthLimit); + lengthCompare->gtFlags |= GTF_UNSIGNED; + + // Lazily allocate the per-frame running-total local, and insert an + // explicit zero-init store at the top of fgFirstBB. Independent of + // compInitMem and prolog zero-init policy. + // + if (frameRunningTotalLclNum == BAD_VAR_NUM) + { + frameRunningTotalLclNum = lvaGrabTemp(false DEBUGARG("stack alloc frame running total")); + lvaTable[frameRunningTotalLclNum].lvType = TYP_I_IMPL; + + GenTree* const zeroInit = gtNewStoreLclVarNode(frameRunningTotalLclNum, gtNewIconNode(0, TYP_I_IMPL)); + Statement* const zeroInitStmt = fgNewStmtFromTree(zeroInit); + fgInsertStmtAtBeg(fgFirstBB, zeroInitStmt); + + JITDUMP("Created stack alloc frame running total V%02u, zero-init at " FMT_BB "\n", frameRunningTotalLclNum, + fgFirstBB->bbNum); + } + + // Build the second check: running + totalSize > frameLimit (unsigned). + // Note: when the length check fails the totalSize value computed in + // the temp is irrelevant; OR'ing the two compares preserves correct + // dispatch (the length check forces heap regardless of the second). + // + size_t const frameLimit = (size_t)(unsigned)JitConfig.JitObjectStackAllocationFrameSize(); + GenTree* const runningForCheck = gtNewLclVarNode(frameRunningTotalLclNum); + GenTree* const totalSizeForSum = gtNewLclVarNode(totalSizeTemp); + GenTree* const newRunningTotal = gtNewOperNode(GT_ADD, TYP_I_IMPL, runningForCheck, totalSizeForSum); + GenTree* const frameLimitNode = gtNewIconNode((ssize_t)frameLimit, TYP_I_IMPL); + GenTree* const frameCompare = gtNewOperNode(GT_GT, TYP_INT, newRunningTotal, frameLimitNode); + frameCompare->gtFlags |= GTF_UNSIGNED; + + // Combine the two compares. JTRUE requires a relop child, so wrap + // the OR with NE 0. + // + GenTree* const combinedOr = gtNewOperNode(GT_OR, TYP_INT, lengthCompare, frameCompare); + GenTree* const combinedCond = gtNewOperNode(GT_NE, TYP_INT, combinedOr, gtNewIconNode(0, TYP_INT)); + GenTree* const runtimeSizeCheck = gtNewOperNode(GT_JTRUE, TYP_VOID, combinedCond); Statement* const runtimeSizeCheckStmt = fgNewStmtFromTree(runtimeSizeCheck); gtUpdateStmtSideEffects(runtimeSizeCheckStmt); @@ -3103,6 +3148,17 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, Statement* stmt, gtUpdateStmtSideEffects(locallocStmt); fgInsertStmtBefore(locallocBlock, stmt, locallocStmt); + // Update the per-frame running total. Only the localloc path + // consumes frame space, so do it here and not on the heap path. + // + GenTree* const runningOld = gtNewLclVarNode(frameRunningTotalLclNum); + GenTree* const totalSizeAdd = gtNewLclVarNode(totalSizeTemp); + GenTree* const runningSum = gtNewOperNode(GT_ADD, TYP_I_IMPL, runningOld, totalSizeAdd); + GenTree* const runningStore = gtNewStoreLclVarNode(frameRunningTotalLclNum, runningSum); + Statement* const runningStmt = fgNewStmtFromTree(runningStore); + gtUpdateStmtSideEffects(runningStmt); + fgInsertStmtBefore(locallocBlock, locallocStmt, runningStmt); + // Array address is the result of the localloc // stackLocalAddress = gtNewLclVarNode(locallocTemp); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 3e253f341200bd..a6996c7d2549fd 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -703,6 +703,10 @@ CONFIG_STRING(JitObjectStackAllocationConditionalEscapeRange, "JitObjectStackAll RELEASE_CONFIG_INTEGER(JitObjectStackAllocationArray, "JitObjectStackAllocationArray", 1) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationSize, "JitObjectStackAllocationSize", 528) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationLocalloc, "JitObjectStackAllocationLocalloc", 1) +// Maximum cumulative bytes of conditional (localloc) stack allocations per method invocation. +// Once the running total would exceed this, further conditional allocations fall back to heap. +// Default is 8x JitObjectStackAllocationSize. +RELEASE_CONFIG_INTEGER(JitObjectStackAllocationFrameSize, "JitObjectStackAllocationFrameSize", 8 * 528) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationInLoop, "JitObjectStackAllocationInLoop", 1) RELEASE_CONFIG_INTEGER(JitObjectStackAllocationTrackFields, "JitObjectStackAllocationTrackFields", 1) CONFIG_STRING(JitObjectStackAllocationTrackFieldsRange, "JitObjectStackAllocationTrackFieldsRange") diff --git a/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs b/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs index 161f7ac8a1d10d..db5714cb46ad7d 100644 --- a/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs +++ b/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs @@ -163,6 +163,30 @@ static int VariableLengthHuge() return array.Length; } + // Repeatedly allocate a small variable-length array within a single + // method invocation. The per-frame budget caps total localloc bytes, so + // after enough iterations the remaining allocations must fall back to + // the heap rather than growing the frame without bound. + [MethodImpl(MethodImplOptions.NoInlining)] + static int VariableLengthFrameBudget() + { + int sum = 0; + for (int iter = 0; iter < 200; iter++) + { + int n = OpaqueLength(64); + int[] array = new int[n]; + for (int i = 0; i < array.Length; i++) + { + array[i] = i + 1; + } + for (int i = 0; i < array.Length; i++) + { + sum += array[i]; + } + } + return sum; + } + [Fact] public static int TestSmall() => CallTestAndVerifyAllocation(VariableLengthSmall, 8 + (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8), StackAllocation()); @@ -177,4 +201,8 @@ static int VariableLengthHuge() [Fact] public static int TestHuge() => CallTestAndVerifyAllocation(VariableLengthHuge, 0, AllocationKind.Undefined, throws: true); + + [Fact] + public static int TestFrameBudget() => + CallTestAndVerifyAllocation(VariableLengthFrameBudget, 200 * ((64 * 65) / 2), HeapAllocation()); } From 1993d4133512b0eec1f42449cbc3cf26a709ed00 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 11 May 2026 11:07:24 -0700 Subject: [PATCH 27/27] Address PR #127980 review feedback * helperexpansion.cpp: in the localloc dispatch path, spill the length and method-table args to fresh temps once at the start of expansion, replacing the call's arg slots with temp uses. All downstream consumers (size compute, runtime size check, heap-fallback newCall, header init for length and method-table) now reference fresh gtNewLclVarNode uses of those temps. This removes: - the dependency on gtCloneExpr being able to clone a possibly side-effecting / non-clonable length expression, and - the transient single-parent violation where the heap-fallback newCall would share arg nodes with the original call. * LocallocStackAlloc.cs: add an unmeasured warm-up call for tests that assert pure stack allocation (no GC-heap bytes), so the first invocation's JIT-compile-time allocations don't contaminate the measurement. Mirrors the pattern used by Delegates.Test0/Test1. * LocallocStackAlloc.cs: annotate every [Fact] with [ActiveIssue(""needs triage"", TestRuntimes.Mono)] (and add `using TestLibrary;`) to match the other tests in this folder that are CoreCLR-JIT-specific. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/coreclr/jit/helperexpansion.cpp | 58 ++++++++++++++++--- .../LocallocStackAlloc.cs | 26 +++++++-- 2 files changed, 72 insertions(+), 12 deletions(-) diff --git a/src/coreclr/jit/helperexpansion.cpp b/src/coreclr/jit/helperexpansion.cpp index a693da516a0070..ded490dc4cc846 100644 --- a/src/coreclr/jit/helperexpansion.cpp +++ b/src/coreclr/jit/helperexpansion.cpp @@ -2942,8 +2942,15 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, } } - GenTree* const lengthArg = call->gtArgs.GetArgByIndex(lengthArgIndex)->GetNode(); - GenTree* stackLocalAddress = nullptr; + GenTree* lengthArg = call->gtArgs.GetArgByIndex(lengthArgIndex)->GetNode(); + GenTree* stackLocalAddress = nullptr; + + // Temps holding the once-evaluated length and method-table args for the + // localloc path. Used by both the dispatch path and the header init, + // so declared at function scope. + // + unsigned lengthTemp = BAD_VAR_NUM; + unsigned typeTemp = BAD_VAR_NUM; // If we have a localloc, compute (at runtime) overall size, and check length // against a threshold. If over, heap allocate. @@ -2955,10 +2962,45 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, GenTree* const elemSize = elemSizeArg->GetNode(); assert(elemSize->IsCnsIntOrI()); + // Spill the length and method-table args to fresh temps so all + // downstream consumers (size compute, runtime check, header + // init, heap-fallback call) reference a temp use instead of + // cloning the original (possibly side-effecting / non-clonable) + // expressions. Replace the call's arg slots with a temp use so + // the original expressions live in exactly one place. + // + { + GenTree*& lengthArgRef = call->gtArgs.GetArgByIndex(lengthArgIndex)->NodeRef(); + GenTree* const origLength = lengthArgRef; + lengthTemp = lvaGrabTemp(true DEBUGARG("stack array length")); + lvaTable[lengthTemp].lvType = genActualType(origLength); + + GenTree* const lengthSpill = gtNewStoreLclVarNode(lengthTemp, origLength); + Statement* const lengthSpillStmt = fgNewStmtFromTree(lengthSpill); + gtUpdateStmtSideEffects(lengthSpillStmt); + fgInsertStmtBefore(block, stmt, lengthSpillStmt); + + lengthArgRef = gtNewLclVarNode(lengthTemp); + lengthArg = lengthArgRef; + } + { + GenTree*& typeArgRef = call->gtArgs.GetArgByIndex(typeArgIndex)->NodeRef(); + GenTree* const origType = typeArgRef; + typeTemp = lvaGrabTemp(true DEBUGARG("stack array method table")); + lvaTable[typeTemp].lvType = genActualType(origType); + + GenTree* const typeSpill = gtNewStoreLclVarNode(typeTemp, origType); + Statement* const typeSpillStmt = fgNewStmtFromTree(typeSpill); + gtUpdateStmtSideEffects(typeSpillStmt); + fgInsertStmtBefore(block, stmt, typeSpillStmt); + + typeArgRef = gtNewLclVarNode(typeTemp); + } + unsigned const locallocTemp = lvaGrabTemp(true DEBUGARG("localloc stack address")); lvaTable[locallocTemp].lvType = TYP_I_IMPL; - GenTree* const arrayLength = gtCloneExpr(lengthArg); + GenTree* const arrayLength = gtNewLclVarNode(lengthTemp); GenTree* const baseSize = gtNewIconNode(OFFSETOF__CORINFO_Array__data, TYP_I_IMPL); GenTree* const payloadSize = gtNewOperNode(GT_MUL, TYP_I_IMPL, elemSize, arrayLength); GenTree* totalSize = gtNewOperNode(GT_ADD, TYP_I_IMPL, baseSize, payloadSize); @@ -3030,7 +3072,7 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, } } - GenTree* const lengthForCheck = gtCloneExpr(lengthArg); + GenTree* const lengthForCheck = gtNewLclVarNode(lengthTemp); var_types const lengthType = genActualType(lengthForCheck); GenTree* const lengthLimit = gtNewIconNode((ssize_t)maxSafeLength, lengthType); GenTree* const lengthCompare = gtNewOperNode(GT_GT, TYP_INT, lengthForCheck, lengthLimit); @@ -3112,8 +3154,8 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, // GenTreeCall* newCall = gtNewCallNode(CT_HELPER, call->gtCallMethHnd, call->TypeGet()); - newCall->gtArgs.PushBack(this, NewCallArg::Primitive(call->gtArgs.GetArgByIndex(typeArgIndex)->GetNode())); - newCall->gtArgs.PushBack(this, NewCallArg::Primitive(call->gtArgs.GetArgByIndex(lengthArgIndex)->GetNode())); + newCall->gtArgs.PushBack(this, NewCallArg::Primitive(gtNewLclVarNode(typeTemp))); + newCall->gtArgs.PushBack(this, NewCallArg::Primitive(gtNewLclVarNode(lengthTemp))); newCall->gtFlags = call->gtFlags; #if defined(FEATURE_READYTORUN) newCall->setEntryPoint(call->gtEntryPoint); @@ -3201,7 +3243,7 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, // Initialize the array method table pointer. // GenTree* const mt = call->gtArgs.GetArgByIndex(typeArgIndex)->GetNode(); - GenTree* const mtToStore = isLocAlloc ? gtCloneExpr(mt) : mt; + GenTree* const mtToStore = isLocAlloc ? gtNewLclVarNode(typeTemp) : mt; GenTree* const mtStore = gtNewStoreValueNode(TYP_I_IMPL, stackLocalAddress, mtToStore); Statement* const mtStmt = fgNewStmtFromTree(mtStore); @@ -3209,7 +3251,7 @@ bool Compiler::fgExpandStackArrayAllocation(BasicBlock* block, // Initialize the array length. // - GenTree* const arrayLengthToStore = isLocAlloc ? gtCloneExpr(lengthArg) : lengthArg; + GenTree* const arrayLengthToStore = isLocAlloc ? gtNewLclVarNode(lengthTemp) : lengthArg; GenTree* const lengthArgInt = fgOptimizeCast(gtNewCastNode(TYP_INT, arrayLengthToStore, false, TYP_INT)); GenTree* const lengthAddress = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtCloneExpr(stackLocalAddress), gtNewIconNode(OFFSETOF__CORINFO_Array__length, TYP_I_IMPL)); diff --git a/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs b/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs index db5714cb46ad7d..f1a9a86973f1b1 100644 --- a/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs +++ b/src/tests/JIT/opt/ObjectStackAllocation/LocallocStackAlloc.cs @@ -3,6 +3,7 @@ using System; using System.Runtime.CompilerServices; +using TestLibrary; using Xunit; enum AllocationKind @@ -187,22 +188,39 @@ static int VariableLengthFrameBudget() return sum; } + [ActiveIssue("needs triage", TestRuntimes.Mono)] [Fact] - public static int TestSmall() => CallTestAndVerifyAllocation(VariableLengthSmall, 8 + (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8), StackAllocation()); + public static int TestSmall() + { + VariableLengthSmall(); + return CallTestAndVerifyAllocation(VariableLengthSmall, 8 + (1 + 2 + 3 + 4 + 5 + 6 + 7 + 8), StackAllocation()); + } + [ActiveIssue("needs triage", TestRuntimes.Mono)] [Fact] - public static int TestLarge() => CallTestAndVerifyAllocation(VariableLengthLarge, 10_000, HeapAllocation()); + public static int TestLarge() + { + VariableLengthLarge(); + return CallTestAndVerifyAllocation(VariableLengthLarge, 10_000, HeapAllocation()); + } + [ActiveIssue("needs triage", TestRuntimes.Mono)] [Fact] public static int TestNegative() => CallTestAndVerifyAllocation(VariableLengthNegative, 0, AllocationKind.Undefined, throws: true); + [ActiveIssue("needs triage", TestRuntimes.Mono)] [Fact] public static int TestIntMin() => CallTestAndVerifyAllocation(VariableLengthIntMin, 0, AllocationKind.Undefined, throws: true); + [ActiveIssue("needs triage", TestRuntimes.Mono)] [Fact] public static int TestHuge() => CallTestAndVerifyAllocation(VariableLengthHuge, 0, AllocationKind.Undefined, throws: true); + [ActiveIssue("needs triage", TestRuntimes.Mono)] [Fact] - public static int TestFrameBudget() => - CallTestAndVerifyAllocation(VariableLengthFrameBudget, 200 * ((64 * 65) / 2), HeapAllocation()); + public static int TestFrameBudget() + { + VariableLengthFrameBudget(); + return CallTestAndVerifyAllocation(VariableLengthFrameBudget, 200 * ((64 * 65) / 2), HeapAllocation()); + } }