diff --git a/src/coreclr/src/jit/codegencommon.cpp b/src/coreclr/src/jit/codegencommon.cpp index 10129250671e9e..a89e24e217d51a 100644 --- a/src/coreclr/src/jit/codegencommon.cpp +++ b/src/coreclr/src/jit/codegencommon.cpp @@ -4596,8 +4596,9 @@ void CodeGen::genCheckUseBlockInit() // double-counting the initialization impact of any locals. bool counted = false; - if (varDsc->lvIsParam) + if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame) { + noway_assert(varDsc->lvRefCnt() == 0); continue; } @@ -4608,44 +4609,10 @@ void CodeGen::genCheckUseBlockInit() continue; } - // Likewise, initialization of the GS cookie is handled specially for OSR. - // Could do this for non-OSR too.. (likewise for the dummy) - if (compiler->opts.IsOSR() && varNum == compiler->lvaGSSecurityCookie) - { - continue; - } - - if (!varDsc->lvIsInReg() && !varDsc->lvOnFrame) - { - noway_assert(varDsc->lvRefCnt() == 0); - continue; - } - - if (varNum == compiler->lvaInlinedPInvokeFrameVar || varNum == compiler->lvaStubArgumentVar || - varNum == compiler->lvaRetAddrVar) - { - continue; - } - -#if FEATURE_FIXED_OUT_ARGS - if (varNum == compiler->lvaPInvokeFrameRegSaveVar) - { - continue; - } - if (varNum == compiler->lvaOutgoingArgSpaceVar) - { - continue; - } -#endif - -#if defined(FEATURE_EH_FUNCLETS) - // There's no need to force 0-initialization of the PSPSym, it will be - // initialized with a real value in the prolog - if (varNum == compiler->lvaPSPSym) + if (compiler->fgVarIsNeverZeroInitializedInProlog(varNum)) { continue; } -#endif if (compiler->lvaIsFieldOfDependentlyPromotedStruct(varDsc)) { @@ -4655,6 +4622,12 @@ void CodeGen::genCheckUseBlockInit() continue; } + if (varDsc->lvHasExplicitInit) + { + varDsc->lvMustInit = 0; + continue; + } + if (compiler->info.compInitMem || varDsc->HasGCPtr() || varDsc->lvMustInit) { if (varDsc->lvTracked) diff --git a/src/coreclr/src/jit/compiler.cpp b/src/coreclr/src/jit/compiler.cpp index 4779a6fa90c481..0a9003f51268c8 100644 --- a/src/coreclr/src/jit/compiler.cpp +++ b/src/coreclr/src/jit/compiler.cpp @@ -4690,6 +4690,8 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags DoPhase(this, PHASE_BUILD_SSA, &Compiler::fgSsaBuild); } + DoPhase(this, PHASE_ZERO_INITS, &Compiler::optRemoveRedundantZeroInits); + if (doEarlyProp) { // Propagate array length and rewrite getType() method call diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index e1521d63b2cd22..c6181d5e194a55 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -518,6 +518,10 @@ class LclVarDsc unsigned char lvSuppressedZeroInit : 1; // local needs zero init if we transform tail call to loop + unsigned char lvHasExplicitInit : 1; // The local is explicitly initialized and doesn't need zero initialization in + // the prolog. If the local has gc pointers, there are no gc-safe points + // between the prolog and the explicit initialization. + union { unsigned lvFieldLclStart; // The index of the local var representing the first field in the promoted struct // local. For implicit byref parameters, this gets hijacked between @@ -4653,8 +4657,11 @@ class Compiler unsigned fgSsaPassesCompleted; // Number of times fgSsaBuild has been run. + // Returns "true" if this is a special variable that is never zero initialized in the prolog. + inline bool fgVarIsNeverZeroInitializedInProlog(unsigned varNum); + // Returns "true" if the variable needs explicit zero initialization. - inline bool fgVarNeedsExplicitZeroInit(LclVarDsc* varDsc, bool bbInALoop, bool bbIsReturn); + inline bool fgVarNeedsExplicitZeroInit(unsigned varNum, bool bbInALoop, bool bbIsReturn); // The value numbers for this compilation. ValueNumStore* vnStore; @@ -5906,6 +5913,8 @@ class Compiler void optUnrollLoops(); // Unrolls loops (needs to have cost info) + void optRemoveRedundantZeroInits(); + protected: // This enumeration describes what is killed by a call. diff --git a/src/coreclr/src/jit/compiler.hpp b/src/coreclr/src/jit/compiler.hpp index 993c287aea54be..f3e590a7303a45 100644 --- a/src/coreclr/src/jit/compiler.hpp +++ b/src/coreclr/src/jit/compiler.hpp @@ -4145,11 +4145,39 @@ inline void Compiler::CLR_API_Leave(API_ICorJitInfo_Names ename) #endif // MEASURE_CLRAPI_CALLS +//------------------------------------------------------------------------------ +// fgVarIsNeverZeroInitializedInProlog : Check whether the variable is never zero initialized in the prolog. +// +// Arguments: +// varNum - local variable number +// +// Returns: +// true if this is a special variable that is never zero initialized in the prolog; +// false otherwise +// + +bool Compiler::fgVarIsNeverZeroInitializedInProlog(unsigned varNum) +{ + LclVarDsc* varDsc = lvaGetDesc(varNum); + bool result = varDsc->lvIsParam || lvaIsOSRLocal(varNum) || (opts.IsOSR() && (varNum == lvaGSSecurityCookie)) || + (varNum == lvaInlinedPInvokeFrameVar) || (varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar); + +#if FEATURE_FIXED_OUT_ARGS + result = result || (varNum == lvaPInvokeFrameRegSaveVar) || (varNum == lvaOutgoingArgSpaceVar); +#endif + +#if defined(FEATURE_EH_FUNCLETS) + result = result || (varNum == lvaPSPSym); +#endif + + return result; +} + //------------------------------------------------------------------------------ // fgVarNeedsExplicitZeroInit : Check whether the variable needs an explicit zero initialization. // // Arguments: -// varDsc - local var description +// varNum - local var number // bbInALoop - true if the basic block may be in a loop // bbIsReturn - true if the basic block always returns // @@ -4165,13 +4193,20 @@ inline void Compiler::CLR_API_Leave(API_ICorJitInfo_Names ename) // - compInitMem is set and the variable has a long lifetime or has gc fields. // In these cases we will insert zero-initialization in the prolog if necessary. -bool Compiler::fgVarNeedsExplicitZeroInit(LclVarDsc* varDsc, bool bbInALoop, bool bbIsReturn) +bool Compiler::fgVarNeedsExplicitZeroInit(unsigned varNum, bool bbInALoop, bool bbIsReturn) { + LclVarDsc* varDsc = lvaGetDesc(varNum); + if (bbInALoop && !bbIsReturn) { return true; } + if (fgVarIsNeverZeroInitializedInProlog(varNum)) + { + return true; + } + if (varTypeIsGC(varDsc->lvType)) { return false; diff --git a/src/coreclr/src/jit/compmemkind.h b/src/coreclr/src/jit/compmemkind.h index 1b409544ba23bf..586e5f3ac3ca78 100644 --- a/src/coreclr/src/jit/compmemkind.h +++ b/src/coreclr/src/jit/compmemkind.h @@ -58,6 +58,7 @@ CompMemKindMacro(VariableLiveRanges) CompMemKindMacro(ClassLayout) CompMemKindMacro(TailMergeThrows) CompMemKindMacro(EarlyProp) +CompMemKindMacro(ZeroInit) //clang-format on #undef CompMemKindMacro diff --git a/src/coreclr/src/jit/compphases.h b/src/coreclr/src/jit/compphases.h index 0cafdc1793e3ca..ccda79c4cfe794 100644 --- a/src/coreclr/src/jit/compphases.h +++ b/src/coreclr/src/jit/compphases.h @@ -55,6 +55,7 @@ CompPhaseNameMacro(PHASE_CREATE_FUNCLETS, "Create EH funclets", CompPhaseNameMacro(PHASE_MERGE_THROWS, "Merge throw blocks", "MRGTHROW", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_LAYOUT, "Optimize layout", "LAYOUT", false, -1, false) CompPhaseNameMacro(PHASE_COMPUTE_REACHABILITY, "Compute blocks reachability", "BL_REACH", false, -1, false) +CompPhaseNameMacro(PHASE_ZERO_INITS, "Redundant zero Inits", "ZERO-INIT", false, -1, false) CompPhaseNameMacro(PHASE_OPTIMIZE_LOOPS, "Optimize loops", "LOOP-OPT", false, -1, false) CompPhaseNameMacro(PHASE_CLONE_LOOPS, "Clone loops", "LP-CLONE", false, -1, false) CompPhaseNameMacro(PHASE_UNROLL_LOOPS, "Unroll loops", "UNROLL", false, -1, false) diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp index 617367eef73e90..8f668426490379 100644 --- a/src/coreclr/src/jit/flowgraph.cpp +++ b/src/coreclr/src/jit/flowgraph.cpp @@ -23787,7 +23787,7 @@ Statement* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) if (tmpNum != BAD_VAR_NUM) { LclVarDsc* const tmpDsc = lvaGetDesc(tmpNum); - if (!fgVarNeedsExplicitZeroInit(tmpDsc, bbInALoop, bbIsReturn)) + if (!fgVarNeedsExplicitZeroInit(tmpNum, bbInALoop, bbIsReturn)) { JITDUMP("\nSuppressing zero-init for V%02u -- expect to zero in prolog\n", tmpNum); tmpDsc->lvSuppressedZeroInit = 1; diff --git a/src/coreclr/src/jit/importer.cpp b/src/coreclr/src/jit/importer.cpp index 06522b71930069..d5bf9bf745541d 100644 --- a/src/coreclr/src/jit/importer.cpp +++ b/src/coreclr/src/jit/importer.cpp @@ -13883,7 +13883,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) bool bbIsReturn = (block->bbJumpKind == BBJ_RETURN) && (!compIsForInlining() || (impInlineInfo->iciBlock->bbJumpKind == BBJ_RETURN)); LclVarDsc* const lclDsc = lvaGetDesc(lclNum); - if (fgVarNeedsExplicitZeroInit(lclDsc, bbInALoop, bbIsReturn)) + if (fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn)) { // Append a tree to zero-out the temp newObjThisPtr = gtNewLclvNode(lclNum, lvaTable[lclNum].TypeGet()); diff --git a/src/coreclr/src/jit/objectalloc.cpp b/src/coreclr/src/jit/objectalloc.cpp index 4c34b4d36806bc..7be52493cd21e6 100644 --- a/src/coreclr/src/jit/objectalloc.cpp +++ b/src/coreclr/src/jit/objectalloc.cpp @@ -521,7 +521,7 @@ unsigned int ObjectAllocator::MorphAllocObjNodeIntoStackAlloc(GenTreeAllocObj* a bool bbInALoop = (block->bbFlags & BBF_BACKWARD_JUMP) != 0; bool bbIsReturn = block->bbJumpKind == BBJ_RETURN; LclVarDsc* const lclDsc = comp->lvaGetDesc(lclNum); - if (comp->fgVarNeedsExplicitZeroInit(lclDsc, bbInALoop, bbIsReturn)) + if (comp->fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn)) { //------------------------------------------------------------------------ // STMTx (IL 0x... ???) diff --git a/src/coreclr/src/jit/optimizer.cpp b/src/coreclr/src/jit/optimizer.cpp index 096898ba00997c..2fe04e7a8b0606 100644 --- a/src/coreclr/src/jit/optimizer.cpp +++ b/src/coreclr/src/jit/optimizer.cpp @@ -9189,3 +9189,147 @@ void Compiler::optOptimizeBools() fgDebugCheckBBlist(); #endif } + +typedef JitHashTable, unsigned> LclVarRefCounts; + +//------------------------------------------------------------------------------------------ +// optRemoveRedundantZeroInits: Remove redundant zero intializations. +// +// Notes: +// This phase iterates over basic blocks starting with the first basic block until there is no unique +// basic block successor or until it detects a loop. It keeps track of local nodes it encounters. +// When it gets to an assignment to a local variable or a local field, it checks whether the assignment +// is the first reference to the local (or to the parent of the local field), and, if so, +// it may do one of two optimizations: +// 1. If the following conditions are true: +// the local is untracked, +// the rhs of the assignment is 0, +// the local is guaranteed to be fully initialized in the prolog, +// then the explicit zero initialization is removed. +// 2. If the following conditions are true: +// the assignment is to a local (and not a field), +// the local is not lvLiveInOutOfHndlr or no exceptions can be thrown between the prolog and the assignment, +// either the local has no gc pointers or there are no gc-safe points between the prolog and the assignment, +// then the local with lvHasExplicitInit which tells the codegen not to insert zero initialization for this +// local in the prolog. + +void Compiler::optRemoveRedundantZeroInits() +{ +#ifdef DEBUG + if (verbose) + { + printf("*************** In optRemoveRedundantZeroInits()\n"); + } +#endif // DEBUG + + CompAllocator allocator(getAllocator(CMK_ZeroInit)); + LclVarRefCounts refCounts(allocator); + bool hasGCSafePoint = false; + bool canThrow = false; + + assert(fgStmtListThreaded); + + for (BasicBlock* block = fgFirstBB; (block != nullptr) && ((block->bbFlags & BBF_MARKED) == 0); + block = block->GetUniqueSucc()) + { + block->bbFlags |= BBF_MARKED; + for (Statement* stmt = block->FirstNonPhiDef(); stmt != nullptr;) + { + Statement* next = stmt->GetNextStmt(); + for (GenTree* tree = stmt->GetTreeList(); tree != nullptr; tree = tree->gtNext) + { + if (((tree->gtFlags & GTF_CALL) != 0) && (!tree->IsCall() || !tree->AsCall()->IsSuppressGCTransition())) + { + hasGCSafePoint = true; + } + + if ((tree->gtFlags & GTF_EXCEPT) != 0) + { + canThrow = true; + } + + switch (tree->gtOper) + { + case GT_LCL_VAR: + case GT_LCL_FLD: + case GT_LCL_VAR_ADDR: + case GT_LCL_FLD_ADDR: + { + unsigned lclNum = tree->AsLclVarCommon()->GetLclNum(); + unsigned* pRefCount = refCounts.LookupPointer(lclNum); + if (pRefCount != nullptr) + { + *pRefCount = (*pRefCount) + 1; + } + else + { + refCounts.Set(lclNum, 1); + } + + break; + } + case GT_ASG: + { + GenTreeOp* treeOp = tree->AsOp(); + if (treeOp->gtOp1->OperIs(GT_LCL_VAR, GT_LCL_FLD)) + { + unsigned lclNum = treeOp->gtOp1->AsLclVarCommon()->GetLclNum(); + LclVarDsc* const lclDsc = lvaGetDesc(lclNum); + unsigned* pRefCount = refCounts.LookupPointer(lclNum); + assert(pRefCount != nullptr); + if (*pRefCount == 1) + { + // The local hasn't been referenced before this assignment. + bool removedExplicitZeroInit = false; + if (!lclDsc->lvTracked && treeOp->gtOp2->IsIntegralConst(0)) + { + bool bbInALoop = (block->bbFlags & BBF_BACKWARD_JUMP) != 0; + bool bbIsReturn = block->bbJumpKind == BBJ_RETURN; + + if (!fgVarNeedsExplicitZeroInit(lclNum, bbInALoop, bbIsReturn)) + { + // We are guaranteed to have a zero initialization in the prolog and + // the local hasn't been redefined between the prolog and this explicit + // zero initialization so the assignment can be safely removed. + if (tree == stmt->GetRootNode()) + { + fgRemoveStmt(block, stmt); + removedExplicitZeroInit = true; + *pRefCount = 0; + lclDsc->lvSuppressedZeroInit = 1; + } + } + } + + if (!removedExplicitZeroInit && treeOp->gtOp1->OperIs(GT_LCL_VAR) && + (!canThrow || !lclDsc->lvLiveInOutOfHndlr)) + { + // If compMethodRequiresPInvokeFrame() returns true, lower may later + // insert a call to CORINFO_HELP_INIT_PINVOKE_FRAME which is a gc-safe point. + if (!lclDsc->HasGCPtr() || + (!GetInterruptible() && !hasGCSafePoint && !compMethodRequiresPInvokeFrame())) + { + // The local hasn't been used and won't be reported to the gc between + // the prolog and this explicit intialization. Therefore, it doesn't + // require zero initialization in the prolog. + lclDsc->lvHasExplicitInit = 1; + } + } + } + } + break; + } + default: + break; + } + } + stmt = next; + } + } + + for (BasicBlock* block = fgFirstBB; (block != nullptr) && ((block->bbFlags & BBF_MARKED) != 0); + block = block->GetUniqueSucc()) + { + block->bbFlags &= ~BBF_MARKED; + } +}