From 889424325931da53d84710adae699ab7cb371c90 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 15 Mar 2023 13:21:01 +0100 Subject: [PATCH 1/6] Re-order some blocks in runtimelookup --- src/coreclr/jit/runtimelookup.cpp | 39 ++++++++++++------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 58e7d75ae4ba90..04a7a415855e56 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -239,12 +239,12 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // if (*fastPathValue == null) // goto fallbackBb; // - // fastPathBb(BBJ_ALWAYS): [weight: 0.8] - // rtLookupLcl = *fastPathValue; + // fallbackBb(BBJ_ALWAYS): [weight: 0.2] + // rtLookupLcl = HelperCall(); // goto block; // - // fallbackBb(BBJ_NONE): [weight: 0.2] - // rtLookupLcl = HelperCall(); + // fastPathBb(BBJ_NONE): [weight: 0.8] + // rtLookupLcl = *fastPathValue; // // block(...): [weight: 1.0] // use(rtLookupLcl); @@ -262,15 +262,14 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() CreateBlockFromTree(this, prevBb, BBJ_COND, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), debugInfo); + // Fast-path basic block + GenTree* asgFastpathValue = gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone); + BasicBlock* fastPathBb = CreateBlockFromTree(this, nullcheckBb, BBJ_NONE, asgFastpathValue, debugInfo); + // Fallback basic block GenTree* asgFallbackValue = gtNewAssignNode(gtClone(rtLookupLcl), call); BasicBlock* fallbackBb = - CreateBlockFromTree(this, nullcheckBb, BBJ_NONE, asgFallbackValue, debugInfo, true); - - // Fast-path basic block - GenTree* asgFastpathValue = gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone); - BasicBlock* fastPathBb = - CreateBlockFromTree(this, nullcheckBb, BBJ_ALWAYS, asgFastpathValue, debugInfo); + CreateBlockFromTree(this, nullcheckBb, BBJ_ALWAYS, asgFallbackValue, debugInfo, true); BasicBlock* sizeCheckBb = nullptr; if (needsSizeCheck) @@ -288,12 +287,12 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // if (*fastPathValue == null) // goto fallbackBb; // - // fastPathBb(BBJ_ALWAYS): [weight: 0.64] - // rtLookupLcl = *fastPathValue; + // fallbackBb(BBJ_ALWAYS): [weight: 0.36] + // rtLookupLcl = HelperCall(); // goto block; // - // fallbackBb(BBJ_NONE): [weight: 0.36] - // rtLookupLcl = HelperCall(); + // fastPathBb(BBJ_NONE): [weight: 0.64] + // rtLookupLcl = *fastPathValue; // // block(...): [weight: 1.0] // use(rtLookupLcl); @@ -322,8 +321,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fgRemoveRefPred(block, prevBb); fgAddRefPred(block, fastPathBb); fgAddRefPred(block, fallbackBb); - nullcheckBb->bbJumpDest = fallbackBb; - fastPathBb->bbJumpDest = block; + nullcheckBb->bbJumpDest = fastPathBb; + fallbackBb->bbJumpDest = block; if (needsSizeCheck) { @@ -412,13 +411,5 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } } - if (result == PhaseStatus::MODIFIED_EVERYTHING) - { - if (opts.OptimizationEnabled()) - { - fgReorderBlocks(/* useProfileData */ false); - fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_BASICS); - } - } return result; } From 65a2e0be7997c41cb318bdc08a3d94881cdb5f7e Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Wed, 15 Mar 2023 14:14:07 +0100 Subject: [PATCH 2/6] Update runtimelookup.cpp --- src/coreclr/jit/runtimelookup.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 04a7a415855e56..97c68c7bbea4a8 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -256,7 +256,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Save dictionary slot to a local (to be used by fast path) GenTree* fastPathValueClone = opts.OptimizationEnabled() ? fgMakeMultiUse(&fastPathValue) : gtCloneExpr(fastPathValue); - GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); + GenTree* nullcheckOp = gtNewOperNode(GT_NE, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; BasicBlock* nullcheckBb = CreateBlockFromTree(this, prevBb, BBJ_COND, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), From 05293dc3903fe7ab46594d35b98f9e70b3ef1422 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Wed, 15 Mar 2023 14:27:00 +0100 Subject: [PATCH 3/6] Update runtimelookup.cpp --- src/coreclr/jit/runtimelookup.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 97c68c7bbea4a8..a1f71a821d5200 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -236,8 +236,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // ... // // nullcheckBb(BBJ_COND): [weight: 1.0] - // if (*fastPathValue == null) - // goto fallbackBb; + // if (*fastPathValue != null) + // goto fastPathBb; // // fallbackBb(BBJ_ALWAYS): [weight: 0.2] // rtLookupLcl = HelperCall(); @@ -284,8 +284,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // ... // // nullcheckBb(BBJ_COND): [weight: 0.8] - // if (*fastPathValue == null) - // goto fallbackBb; + // if (*fastPathValue != null) + // goto fastPathBb; // // fallbackBb(BBJ_ALWAYS): [weight: 0.36] // rtLookupLcl = HelperCall(); From 9311a110d141f84208435743f25d9e79bd712a76 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 29 Mar 2023 02:13:36 +0200 Subject: [PATCH 4/6] mark blocks cold --- src/coreclr/jit/runtimelookup.cpp | 69 ++++++++++++++----------------- 1 file changed, 32 insertions(+), 37 deletions(-) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 8bedc452a5a4d2..2fc04974bfaeb5 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -224,7 +224,6 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } GenTree* ctxTree = call->gtArgs.GetArgByIndex(0)->GetNode(); - GenTree* sigNode = call->gtArgs.GetArgByIndex(1)->GetNode(); // Prepare slotPtr tree (TODO: consider sharing this part with impRuntimeLookup) GenTree* slotPtrTree = gtCloneExpr(ctxTree); @@ -272,15 +271,15 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // ... // // nullcheckBb(BBJ_COND): [weight: 1.0] - // if (*fastPathValue != null) - // goto fastPathBb; + // if (*fastPathValue == null) + // goto fallbackBb; // - // fallbackBb(BBJ_ALWAYS): [weight: 0.2] - // rtLookupLcl = HelperCall(); + // fastPathBb(BBJ_ALWAYS): [weight: 1.0] + // rtLookupLcl = *fastPathValue; // goto block; // - // fastPathBb(BBJ_NONE): [weight: 0.8] - // rtLookupLcl = *fastPathValue; + // fallbackBb(BBJ_NONE): [weight: 0.0] + // rtLookupLcl = HelperCall(); // // block(...): [weight: 1.0] // use(rtLookupLcl); @@ -292,20 +291,21 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // Save dictionary slot to a local (to be used by fast path) GenTree* fastPathValueClone = opts.OptimizationEnabled() ? fgMakeMultiUse(&fastPathValue) : gtCloneExpr(fastPathValue); - GenTree* nullcheckOp = gtNewOperNode(GT_NE, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); + GenTree* nullcheckOp = gtNewOperNode(GT_EQ, TYP_INT, fastPathValue, gtNewIconNode(0, TYP_I_IMPL)); nullcheckOp->gtFlags |= GTF_RELOP_JMP_USED; BasicBlock* nullcheckBb = CreateBlockFromTree(this, prevBb, BBJ_COND, gtNewOperNode(GT_JTRUE, TYP_VOID, nullcheckOp), debugInfo); - // Fast-path basic block - GenTree* asgFastpathValue = gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone); - BasicBlock* fastPathBb = CreateBlockFromTree(this, nullcheckBb, BBJ_NONE, asgFastpathValue, debugInfo); - // Fallback basic block GenTree* asgFallbackValue = gtNewAssignNode(gtClone(rtLookupLcl), call); BasicBlock* fallbackBb = - CreateBlockFromTree(this, nullcheckBb, BBJ_ALWAYS, asgFallbackValue, debugInfo, true); + CreateBlockFromTree(this, nullcheckBb, BBJ_NONE, asgFallbackValue, debugInfo, true); + + // Fast-path basic block + GenTree* asgFastpathValue = gtNewAssignNode(gtClone(rtLookupLcl), fastPathValueClone); + BasicBlock* fastPathBb = + CreateBlockFromTree(this, nullcheckBb, BBJ_ALWAYS, asgFastpathValue, debugInfo); BasicBlock* sizeCheckBb = nullptr; if (needsSizeCheck) @@ -319,16 +319,16 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // goto fallbackBb; // ... // - // nullcheckBb(BBJ_COND): [weight: 0.8] - // if (*fastPathValue != null) - // goto fastPathBb; + // nullcheckBb(BBJ_COND): [weight: 1.0] + // if (*fastPathValue == null) + // goto fallbackBb; // - // fallbackBb(BBJ_ALWAYS): [weight: 0.36] - // rtLookupLcl = HelperCall(); + // fastPathBb(BBJ_ALWAYS): [weight: 1.0] + // rtLookupLcl = *fastPathValue; // goto block; // - // fastPathBb(BBJ_NONE): [weight: 0.64] - // rtLookupLcl = *fastPathValue; + // fallbackBb(BBJ_NONE): [weight: 0.0] + // rtLookupLcl = HelperCall(); // // block(...): [weight: 1.0] // use(rtLookupLcl); @@ -357,8 +357,8 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() fgRemoveRefPred(block, prevBb); fgAddRefPred(block, fastPathBb); fgAddRefPred(block, fallbackBb); - nullcheckBb->bbJumpDest = fastPathBb; - fallbackBb->bbJumpDest = block; + nullcheckBb->bbJumpDest = fallbackBb; + fastPathBb->bbJumpDest = block; if (needsSizeCheck) { @@ -386,27 +386,15 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() // // Re-distribute weights (see '[weight: X]' on the diagrams above) - // TODO: consider marking fallbackBb as rarely-taken // block->inheritWeight(prevBb); if (needsSizeCheck) { sizeCheckBb->inheritWeight(prevBb); - // 80% chance we pass nullcheck - nullcheckBb->inheritWeightPercentage(sizeCheckBb, 80); - // 64% (0.8 * 0.8) chance we pass both nullcheck and sizecheck - fastPathBb->inheritWeightPercentage(nullcheckBb, 80); - // 100-64=36% chance we fail either nullcheck or sizecheck - fallbackBb->inheritWeightPercentage(sizeCheckBb, 36); - } - else - { - nullcheckBb->inheritWeight(prevBb); - // 80% chance we pass nullcheck - fastPathBb->inheritWeightPercentage(nullcheckBb, 80); - // 20% chance we fail nullcheck (TODO: Consider making it cold (0%)) - fallbackBb->inheritWeightPercentage(nullcheckBb, 20); } + nullcheckBb->inheritWeight(prevBb); + fastPathBb->inheritWeight(prevBb); + fallbackBb->bbSetRunRarely(); // // Update loop info if loop table is known to be valid @@ -447,5 +435,12 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } } + if (result == PhaseStatus::MODIFIED_EVERYTHING) + { + if (opts.OptimizationEnabled()) + { + fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_BASICS); + } + } return result; } From 73b458b2ea5c92c0a75bec90254b0ce126b489d4 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 29 Mar 2023 02:24:37 +0200 Subject: [PATCH 5/6] Restore fgReorderBlocks --- src/coreclr/jit/runtimelookup.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 2fc04974bfaeb5..468872b8ba6e06 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -439,6 +439,7 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() { if (opts.OptimizationEnabled()) { + fgReorderBlocks(false); fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_BASICS); } } From 25056314b1a665c74e6ab77dbf4338615ef066fb Mon Sep 17 00:00:00 2001 From: EgorBo Date: Wed, 29 Mar 2023 02:33:36 +0200 Subject: [PATCH 6/6] actually we don't need them, let's rely on OptimizeLayout phase --- src/coreclr/jit/runtimelookup.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/runtimelookup.cpp b/src/coreclr/jit/runtimelookup.cpp index 468872b8ba6e06..e2209b54e1d4dd 100644 --- a/src/coreclr/jit/runtimelookup.cpp +++ b/src/coreclr/jit/runtimelookup.cpp @@ -424,6 +424,12 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() assert(BasicBlock::sameEHRegion(prevBb, sizeCheckBb)); } + // Merge prevBb with nullcheckBb (or sizeBb) if possible to simplify layout + if (fgCanCompactBlocks(prevBb, prevBb->GetUniqueSucc())) + { + fgCompactBlocks(prevBb, prevBb->GetUniqueSucc()); + } + // Scan current block again, the current call will be ignored because of ClearExpRuntimeLookup. // We don't try to re-use expansions for the same lookups in the current block here - CSE is responsible // for that @@ -434,14 +440,5 @@ PhaseStatus Compiler::fgExpandRuntimeLookups() } } } - - if (result == PhaseStatus::MODIFIED_EVERYTHING) - { - if (opts.OptimizationEnabled()) - { - fgReorderBlocks(false); - fgUpdateChangedFlowGraph(FlowGraphUpdates::COMPUTE_BASICS); - } - } return result; }