From d711b0f1970574a83857e1f00fb5d67008c40403 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 10 Jul 2021 13:31:30 +0300 Subject: [PATCH 01/17] Improve Inliner, vol.3 --- src/coreclr/jit/fgbasic.cpp | 14 ++++++- src/coreclr/jit/importer.cpp | 2 - src/coreclr/jit/inline.def | 1 + src/coreclr/jit/inlinepolicy.cpp | 40 ++++++++++++++++--- src/coreclr/jit/inlinepolicy.h | 4 ++ src/coreclr/jit/jitconfigvalues.h | 2 +- .../Linq/Expressions/DebugViewWriter.cs | 2 + 7 files changed, 55 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 62a9467da63fe9..a40d6a20bf3d26 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -1347,13 +1347,19 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed handled = true; compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_EXPR); } - if ((FgStack::IsConstant(arg0) && FgStack::IsConstant(arg1)) || - (FgStack::IsConstant(arg1) && FgStack::IsConstant(arg0))) + else if ((FgStack::IsConstant(arg0) && FgStack::IsConstant(arg1)) || + (FgStack::IsConstant(arg1) && FgStack::IsConstant(arg0))) { // both are constants, but we're mostly interested in cases where a const arg leads to // a foldable expression. handled = true; } + else if ((FgStack::IsArgument(arg0) == FgStack::IsArgument(arg1)) && (arg0 == arg1)) + { + // Both args are the same + handled = true; + compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_EXPR); + } else if (FgStack::IsArgument(arg0) && FgStack::IsConstantOrConstArg(arg1, impInlineInfo)) { // "Arg op CNS" --> keep arg0 in the stack for the next ops @@ -1583,6 +1589,10 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed if (makeInlineObservations) { compInlineResult->Note(InlineObservation::CALLEE_HAS_SWITCH); + if (FgStack::IsConstantOrConstArg(pushedStack.Top(), impInlineInfo)) + { + compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_SWITCH); + } // Fail fast, if we're inlining and can't handle this. if (isInlining && compInlineResult->IsFailure()) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index d49a12ffd13dd0..c2d26a7d9e57c3 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -13258,8 +13258,6 @@ void Compiler::impImportBlockCode(BasicBlock* block) goto COND_JUMP; case CEE_SWITCH: - assert(!compIsForInlining()); - if (tiVerificationNeeded) { Verify(impStackTop().seTypeInfo.IsType(TI_INT), "Bad switch val"); diff --git a/src/coreclr/jit/inline.def b/src/coreclr/jit/inline.def index a16d82888b64a3..cbd85ff240dea7 100644 --- a/src/coreclr/jit/inline.def +++ b/src/coreclr/jit/inline.def @@ -182,6 +182,7 @@ INLINE_OBSERVATION(FOLDABLE_INTRINSIC, int, "foldable intrinsic", INLINE_OBSERVATION(FOLDABLE_EXPR, int, "foldable binary expression", INFORMATION, CALLSITE) INLINE_OBSERVATION(FOLDABLE_EXPR_UN, int, "foldable unary expression", INFORMATION, CALLSITE) INLINE_OBSERVATION(FOLDABLE_BRANCH, int, "foldable branch", INFORMATION, CALLSITE) +INLINE_OBSERVATION(FOLDABLE_SWITCH, int, "foldable switch", INFORMATION, CALLSITE) INLINE_OBSERVATION(DIV_BY_CNS, int, "dividy by const", INFORMATION, CALLSITE) INLINE_OBSERVATION(CONSTANT_ARG_FEEDS_TEST, bool, "constant argument feeds test", INFORMATION, CALLSITE) INLINE_OBSERVATION(DEPTH, int, "depth", INFORMATION, CALLSITE) diff --git a/src/coreclr/jit/inlinepolicy.cpp b/src/coreclr/jit/inlinepolicy.cpp index fdf4ae19341c2f..8202eb01e71180 100644 --- a/src/coreclr/jit/inlinepolicy.cpp +++ b/src/coreclr/jit/inlinepolicy.cpp @@ -326,7 +326,6 @@ void DefaultPolicy::NoteBool(InlineObservation obs, bool value) m_ArgFeedsRangeCheck++; break; - case InlineObservation::CALLEE_HAS_SWITCH: case InlineObservation::CALLEE_UNSUPPORTED_OPCODE: propagate = true; break; @@ -1294,6 +1293,14 @@ void ExtendedDefaultPolicy::NoteBool(InlineObservation obs, bool value) m_FoldableBranch++; break; + case InlineObservation::CALLSITE_FOLDABLE_SWITCH: + m_FoldableSwitch++; + break; + + case InlineObservation::CALLEE_HAS_SWITCH: + m_Switch++; + break; + case InlineObservation::CALLSITE_DIV_BY_CNS: m_DivByCns++; break; @@ -1364,9 +1371,9 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value) { // We're not able to recognize arg-specific foldable branches // in prejit-root mode. - bbLimit += 3; + bbLimit += 5 + m_Switch * 10; } - bbLimit += m_FoldableBranch; + bbLimit += m_FoldableBranch * 2 + m_ThrowBlock * 2 + m_FoldableSwitch * 10; if ((unsigned)value > bbLimit) { SetNever(InlineObservation::CALLEE_TOO_MANY_BASIC_BLOCKS); @@ -1457,7 +1464,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_NonGenericCallsGeneric) { - multiplier += 1.5; + multiplier += 2.0; JITDUMP("\nInline candidate is generic and caller is not. Multiplier increased to %g.", multiplier); } @@ -1507,7 +1514,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_Intrinsic > 0) { // In most cases such intrinsics are lowered as single CPU instructions - multiplier += 1.5; + multiplier += 1.0 + m_Intrinsic * 0.2; JITDUMP("\nInline has %d intrinsics. Multiplier increased to %g.", m_Intrinsic, multiplier); } @@ -1636,6 +1643,27 @@ double ExtendedDefaultPolicy::DetermineMultiplier() break; } + if (m_FoldableSwitch > 0) + { + multiplier += m_FoldableSwitch * 5.0; + JITDUMP("\nInline candidate has %d foldable switches. Multiplier increased to %g.", m_FoldableSwitch, multiplier); + } + else if (m_Switch > 0) + { + if (m_IsPrejitRoot) + { + // Assume the switches can be foldable in PrejitRoot mode. + multiplier += m_Switch * 5.0; + JITDUMP("\nPrejit root candidate has %d switches. Multiplier increased to %g.", m_Switch, multiplier); + } + else + { + // TODO: Investigate cases where it makes sense to inline non-foldable switches + multiplier = 0; + JITDUMP("\nInline candidate has %d switches. Multiplier limited to %g.", m_Switch, multiplier); + } + } + if (m_HasProfile) { // There are cases when Profile Data can be misleading or polluted: @@ -1738,6 +1766,8 @@ void ExtendedDefaultPolicy::OnDumpXml(FILE* file, unsigned indent) const XATTR_I4(m_FoldableExpr) XATTR_I4(m_FoldableExprUn) XATTR_I4(m_FoldableBranch) + XATTR_I4(m_FoldableSwitch) + XATTR_I4(m_Switch) XATTR_I4(m_DivByCns) XATTR_B(m_ReturnsStructByValue) XATTR_B(m_IsFromValueClass) diff --git a/src/coreclr/jit/inlinepolicy.h b/src/coreclr/jit/inlinepolicy.h index 7d0d83bd736461..466e17fe0e1127 100644 --- a/src/coreclr/jit/inlinepolicy.h +++ b/src/coreclr/jit/inlinepolicy.h @@ -204,6 +204,8 @@ class ExtendedDefaultPolicy : public DefaultPolicy , m_FoldableExpr(0) , m_FoldableExprUn(0) , m_FoldableBranch(0) + , m_FoldableSwitch(0) + , m_Switch(0) , m_DivByCns(0) , m_ReturnsStructByValue(false) , m_IsFromValueClass(false) @@ -252,6 +254,8 @@ class ExtendedDefaultPolicy : public DefaultPolicy unsigned m_FoldableExpr; unsigned m_FoldableExprUn; unsigned m_FoldableBranch; + unsigned m_FoldableSwitch; + unsigned m_Switch; unsigned m_DivByCns; bool m_ReturnsStructByValue : 1; bool m_IsFromValueClass : 1; diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 7254e205fbc48b..53117d07593a6b 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -460,7 +460,7 @@ CONFIG_STRING(JitInlineReplayFile, W("JitInlineReplayFile")) // Extended version of DefaultPolicy that includes a more precise IL scan, // relies on PGO if it exists and generally is more aggressive. CONFIG_INTEGER(JitExtDefaultPolicy, W("JitExtDefaultPolicy"), 1) -CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, W("JitExtDefaultPolicyMaxIL"), 0x64) +CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, W("JitExtDefaultPolicyMaxIL"), 0x400) CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, W("JitExtDefaultPolicyMaxBB"), 7) // Inliner uses the following formula for PGO-driven decisions: diff --git a/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/DebugViewWriter.cs b/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/DebugViewWriter.cs index af967733a5a7ab..2f2ecf4b384355 100644 --- a/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/DebugViewWriter.cs +++ b/src/libraries/System.Linq.Expressions/src/System/Linq/Expressions/DebugViewWriter.cs @@ -7,6 +7,7 @@ using System.Globalization; using System.IO; using System.Reflection; +using System.Runtime.CompilerServices; namespace System.Linq.Expressions { @@ -168,6 +169,7 @@ private void Out(string s, Flow after) Out(Flow.None, s, after); } + [MethodImpl(MethodImplOptions.NoInlining)] private void Out(Flow before, string s, Flow after) { switch (GetFlow(before)) From 154bd99b17abd5b4c23c4a7fff07aa62f559c788 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sat, 10 Jul 2021 15:43:46 +0300 Subject: [PATCH 02/17] Improvements --- src/coreclr/jit/importer.cpp | 2 +- src/coreclr/jit/inlinepolicy.cpp | 19 +++++++++++-------- src/coreclr/jit/jitconfigvalues.h | 3 ++- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index c2d26a7d9e57c3..e4943f5eb42c2b 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -19080,7 +19080,7 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I assert(callSiteWeight >= 0); assert(entryWeight >= 0); - BasicBlock::weight_t sufficientSamples = 1000.0f; + BasicBlock::weight_t sufficientSamples = 5000.0f; if (!rootCompiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) || ((callSiteWeight + entryWeight) > sufficientSamples)) diff --git a/src/coreclr/jit/inlinepolicy.cpp b/src/coreclr/jit/inlinepolicy.cpp index 8202eb01e71180..553d1571a12b57 100644 --- a/src/coreclr/jit/inlinepolicy.cpp +++ b/src/coreclr/jit/inlinepolicy.cpp @@ -1373,7 +1373,7 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value) // in prejit-root mode. bbLimit += 5 + m_Switch * 10; } - bbLimit += m_FoldableBranch * 2 + m_ThrowBlock * 2 + m_FoldableSwitch * 10; + bbLimit += m_FoldableBranch * 2 + m_FoldableSwitch * 10; if ((unsigned)value > bbLimit) { SetNever(InlineObservation::CALLEE_TOO_MANY_BASIC_BLOCKS); @@ -1426,13 +1426,13 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_ReturnsStructByValue) { // For structs-passed-by-value we might avoid expensive copy operations if we inline. - multiplier += 1.5; + multiplier += 2.0; JITDUMP("\nInline candidate returns a struct by value. Multiplier increased to %g.", multiplier); } else if (m_ArgIsStructByValue > 0) { // Same here - multiplier += 1.5; + multiplier += 2.0; JITDUMP("\n%d arguments are structs passed by value. Multiplier increased to %g.", m_ArgIsStructByValue, multiplier); } @@ -1464,7 +1464,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_NonGenericCallsGeneric) { - multiplier += 2.0; + multiplier += 1.5; JITDUMP("\nInline candidate is generic and caller is not. Multiplier increased to %g.", multiplier); } @@ -1688,11 +1688,14 @@ double ExtendedDefaultPolicy::DetermineMultiplier() JITDUMP("\nCallsite has profile data: %g.", m_ProfileFrequency); } - if (m_RootCompiler->lvaTableCnt > ((unsigned)(JitConfig.JitMaxLocalsToTrack() / 4))) + if (JitConfig.JitExtDefaultPolicyMaxLclStep() > 0) { - // Slow down inlining if we already have to many locals in the rootCompiler. - multiplier /= ((double)m_RootCompiler->lvaTableCnt / ((double)JitConfig.JitMaxLocalsToTrack() / 4.0)); - JITDUMP("\nCaller %d locals. Multiplier decreased to %g.", m_RootCompiler->lvaTableCnt, multiplier); + const double lclLimitStep = JitConfig.JitMaxLocalsToTrack() / (double)JitConfig.JitExtDefaultPolicyMaxLclStep(); + if (m_RootCompiler->lvaTableCnt > lclLimitStep) + { + multiplier /= (m_RootCompiler->lvaTableCnt / lclLimitStep); + JITDUMP("\nCaller has %d locals. Multiplier decreased to %g.", m_RootCompiler->lvaTableCnt, multiplier); + } } if (m_BackwardJump) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 53117d07593a6b..914e617c5345a9 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -460,8 +460,9 @@ CONFIG_STRING(JitInlineReplayFile, W("JitInlineReplayFile")) // Extended version of DefaultPolicy that includes a more precise IL scan, // relies on PGO if it exists and generally is more aggressive. CONFIG_INTEGER(JitExtDefaultPolicy, W("JitExtDefaultPolicy"), 1) +CONFIG_INTEGER(JitExtDefaultPolicyMaxLclStep, W("JitExtDefaultPolicyMaxLclStep"), 0xA) CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, W("JitExtDefaultPolicyMaxIL"), 0x400) -CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, W("JitExtDefaultPolicyMaxBB"), 7) +CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, W("JitExtDefaultPolicyMaxBB"), 0xA) // Inliner uses the following formula for PGO-driven decisions: // From 6dd92ca3ae7b1636206ee27afc1dcc32e1e52f34 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Sun, 11 Jul 2021 21:00:25 +0300 Subject: [PATCH 03/17] Tuning. --- src/coreclr/jit/compiler.cpp | 3 ++ src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/fgbasic.cpp | 62 +++++++++++++++++++++---------- src/coreclr/jit/importer.cpp | 17 ++++++--- src/coreclr/jit/inline.h | 1 + src/coreclr/jit/inlinepolicy.cpp | 61 +++++++++++++----------------- src/coreclr/jit/jitconfigvalues.h | 7 ++-- 7 files changed, 89 insertions(+), 64 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 7019d8afad6c5d..ea01b1d30b1ee3 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -6266,6 +6266,9 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, // a potential inline candidate. InlineResult prejitResult(this, methodHnd, "prejit"); + // Profile data allows us to avoid early "too many IL bytes" outs. + prejitResult.NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, fgHaveProfileData()); + // Do the initial inline screen. impCanInlineIL(methodHnd, methodInfo, forceInline, &prejitResult); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 1383b57a7dd3a2..180150d5658a6b 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5777,7 +5777,6 @@ class Compiler #endif } - bool fgHaveProfileData(); bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::weight_t* weight); Instrumentor* fgCountInstrumentor; @@ -5814,6 +5813,7 @@ class Compiler void WalkSpanningTree(SpanningTreeVisitor* visitor); void fgSetProfileWeight(BasicBlock* block, BasicBlock::weight_t weight); void fgApplyProfileScale(); + bool fgHaveProfileData(); // fgIsUsingProfileWeights - returns true if we have real profile data for this method // or if we have some fake profile data for the stress mode diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index a40d6a20bf3d26..eee45e0564ba04 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -818,8 +818,24 @@ class FgStack return false; } const unsigned argNum = value - SLOT_ARGUMENT; - assert(argNum < info->argCnt); - return info->inlArgInfo[argNum].argIsInvariant; + if (argNum < info->argCnt) + { + return info->inlArgInfo[argNum].argIsInvariant; + } + return false; + } + static bool IsExactArgument(FgSlot value, InlineInfo* info) + { + if ((info == nullptr) || !IsArgument(value)) + { + return false; + } + const unsigned argNum = value - SLOT_ARGUMENT; + if (argNum < info->argCnt) + { + return info->inlArgInfo[argNum].argIsExact; + } + return false; } static unsigned SlotTypeToArgNum(FgSlot value) { @@ -867,16 +883,15 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0; const bool makeInlineObservations = (compInlineResult != nullptr); const bool isInlining = compIsForInlining(); - const bool isPreJit = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT); - const bool isTier1 = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1); unsigned retBlocks = 0; int prefixFlags = 0; bool preciseScan = makeInlineObservations && compInlineResult->GetPolicy()->RequiresPreciseScan(); - const bool resolveTokens = preciseScan && (isPreJit || isTier1); + const bool resolveTokens = preciseScan; if (makeInlineObservations) { // Observe force inline state and code size. + compInlineResult->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, fgHaveProfileData()); compInlineResult->NoteBool(InlineObservation::CALLEE_IS_FORCE_INLINE, isForceInline); compInlineResult->NoteInt(InlineObservation::CALLEE_IL_CODE_SIZE, codeSize); @@ -1031,7 +1046,8 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed if (makeInlineObservations) { FgStack::FgSlot slot = pushedStack.Top(); - if (FgStack::IsConstantOrConstArg(slot, impInlineInfo)) + if (FgStack::IsConstantOrConstArg(slot, impInlineInfo) || + FgStack::IsExactArgument(slot, impInlineInfo)) { compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_EXPR_UN); handled = true; // and keep argument in the pushedStack @@ -1338,34 +1354,42 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed FgStack::FgSlot arg0 = pushedStack.Top(1); FgStack::FgSlot arg1 = pushedStack.Top(0); - if ((FgStack::IsConstant(arg0) && FgStack::IsConstArgument(arg1, impInlineInfo)) || - (FgStack::IsConstant(arg1) && FgStack::IsConstArgument(arg0, impInlineInfo)) || - (FgStack::IsConstArgument(arg0, impInlineInfo) && - FgStack::IsConstArgument(arg1, impInlineInfo))) + // Const op ConstArg -> ConstArg + if (FgStack::IsConstant(arg0) && FgStack::IsConstArgument(arg1, impInlineInfo)) { // keep stack unchanged handled = true; compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_EXPR); } - else if ((FgStack::IsConstant(arg0) && FgStack::IsConstant(arg1)) || - (FgStack::IsConstant(arg1) && FgStack::IsConstant(arg0))) + // ConstArg op Const -> ConstArg + // ConstArg op ConstArg -> ConstArg + else if (FgStack::IsConstArgument(arg0, impInlineInfo) && + FgStack::IsConstantOrConstArg(arg1, impInlineInfo)) { - // both are constants, but we're mostly interested in cases where a const arg leads to - // a foldable expression. + if (FgStack::IsConstant(arg1)) + { + pushedStack.Push(arg0); + } handled = true; + compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_EXPR); } - else if ((FgStack::IsArgument(arg0) == FgStack::IsArgument(arg1)) && (arg0 == arg1)) + // Const op Const -> Const + else if (FgStack::IsConstant(arg0) && FgStack::IsConstant(arg1)) { - // Both args are the same + // both are constants, but we're mostly interested in cases where a const arg leads to + // a foldable expression. handled = true; - compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_EXPR); } + // Arg op ConstArg + // Arg op Const else if (FgStack::IsArgument(arg0) && FgStack::IsConstantOrConstArg(arg1, impInlineInfo)) { // "Arg op CNS" --> keep arg0 in the stack for the next ops handled = true; compInlineResult->Note(InlineObservation::CALLEE_BINARY_EXRP_WITH_CNS); } + // ConstArg op Arg + // Const op Arg else if (FgStack::IsArgument(arg1) && FgStack::IsConstantOrConstArg(arg0, impInlineInfo)) { // "CNS op ARG" --> keep arg1 in the stack for the next ops @@ -1373,10 +1397,10 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed handled = true; compInlineResult->Note(InlineObservation::CALLEE_BINARY_EXRP_WITH_CNS); } - + // X / ConstArg + // X % ConstArg if (FgStack::IsConstArgument(arg1, impInlineInfo)) { - // Special case: "X / ConstArg" or "X % ConstArg" if ((opcode == CEE_DIV) || (opcode == CEE_DIV_UN) || (opcode == CEE_REM) || (opcode == CEE_REM_UN)) { diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index e4943f5eb42c2b..f23f963ed0b42f 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -19073,16 +19073,16 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I // if ((pInlineInfo != nullptr) && rootCompiler->fgHaveProfileData() && pInlineInfo->iciBlock->hasProfileWeight()) { - BasicBlock::weight_t callSiteWeight = pInlineInfo->iciBlock->bbWeight; - BasicBlock::weight_t entryWeight = rootCompiler->fgFirstBB->bbWeight; - BasicBlock::weight_t profileFreq = entryWeight == 0.0f ? 0.0f : callSiteWeight / entryWeight; + const BasicBlock::weight_t callSiteWeight = pInlineInfo->iciBlock->bbWeight; + const BasicBlock::weight_t entryWeight = rootCompiler->fgFirstBB->bbWeight; + const BasicBlock::weight_t profileFreq = entryWeight == 0.0f ? 0.0f : callSiteWeight / entryWeight; assert(callSiteWeight >= 0); assert(entryWeight >= 0); - BasicBlock::weight_t sufficientSamples = 5000.0f; + const BasicBlock::weight_t sufficientSamples = 1000.0f; - if (!rootCompiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) || + if ((rootCompiler->fgPgoSource != ICorJitInfo::PgoSource::Static) || ((callSiteWeight + entryWeight) > sufficientSamples)) { // Let's not report profiles for methods with insufficient samples during prejitting. @@ -19241,6 +19241,9 @@ void Compiler::impCheckCanInline(GenTreeCall* call, goto _exit; } + // Profile data allows us to avoid early "too many IL bytes" outs. + pParam->result->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, pParam->pThis->fgHaveProfileData()); + bool forceInline; forceInline = !!(pParam->methAttr & CORINFO_FLG_FORCEINLINE); @@ -19463,6 +19466,10 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo, } } + bool isExact = false; + bool isNonNull = false; + inlCurArgInfo->argIsExact = (gtGetClassHandle(curArgVal, &isExact, &isNonNull) != NO_CLASS_HANDLE) && isExact; + // If the arg is a local that is address-taken, we can't safely // directly substitute it into the inlinee. // diff --git a/src/coreclr/jit/inline.h b/src/coreclr/jit/inline.h index bc08dd8110d2a8..6a39b2d9cb5d01 100644 --- a/src/coreclr/jit/inline.h +++ b/src/coreclr/jit/inline.h @@ -610,6 +610,7 @@ struct InlArgInfo unsigned argHasStargOp : 1; // Is there STARG(s) operation on this argument? unsigned argIsByRefToStructLocal : 1; // Is this arg an address of a struct local or a normed struct local or a // field in them? + unsigned argIsExact : 1; // Is this arg of an exact class? }; // InlLclVarInfo describes inline candidate argument and local variable properties. diff --git a/src/coreclr/jit/inlinepolicy.cpp b/src/coreclr/jit/inlinepolicy.cpp index 553d1571a12b57..926f0e26adb9f6 100644 --- a/src/coreclr/jit/inlinepolicy.cpp +++ b/src/coreclr/jit/inlinepolicy.cpp @@ -1334,7 +1334,13 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value) { assert(m_IsForceInlineKnown); assert(value != 0); - m_CodeSize = static_cast(value); + m_CodeSize = static_cast(value); + unsigned maxCodeSize = static_cast(JitConfig.JitExtDefaultPolicyMaxIL()); + + if (m_HasProfile) + { + maxCodeSize = static_cast(JitConfig.JitExtDefaultPolicyMaxILProf()); + } if (m_IsForceInline) { @@ -1346,7 +1352,7 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value) // Candidate based on small size SetCandidate(InlineObservation::CALLEE_BELOW_ALWAYS_INLINE_SIZE); } - else if (m_CodeSize <= (unsigned)JitConfig.JitExtDefaultPolicyMaxIL()) + else if (m_CodeSize <= maxCodeSize) { // Candidate, pending profitability evaluation SetCandidate(InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE); @@ -1364,21 +1370,6 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value) { SetNever(InlineObservation::CALLEE_DOES_NOT_RETURN); } - else if (!m_IsForceInline) - { - unsigned bbLimit = (unsigned)JitConfig.JitExtDefaultPolicyMaxBB(); - if (m_IsPrejitRoot) - { - // We're not able to recognize arg-specific foldable branches - // in prejit-root mode. - bbLimit += 5 + m_Switch * 10; - } - bbLimit += m_FoldableBranch * 2 + m_FoldableSwitch * 10; - if ((unsigned)value > bbLimit) - { - SetNever(InlineObservation::CALLEE_TOO_MANY_BASIC_BLOCKS); - } - } break; } default: @@ -1426,13 +1417,13 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_ReturnsStructByValue) { // For structs-passed-by-value we might avoid expensive copy operations if we inline. - multiplier += 2.0; + multiplier += 1.5; JITDUMP("\nInline candidate returns a struct by value. Multiplier increased to %g.", multiplier); } else if (m_ArgIsStructByValue > 0) { // Same here - multiplier += 2.0; + multiplier += 1.5; JITDUMP("\n%d arguments are structs passed by value. Multiplier increased to %g.", m_ArgIsStructByValue, multiplier); } @@ -1514,7 +1505,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_Intrinsic > 0) { // In most cases such intrinsics are lowered as single CPU instructions - multiplier += 1.0 + m_Intrinsic * 0.2; + multiplier += 1.0 + m_Intrinsic * 0.3; JITDUMP("\nInline has %d intrinsics. Multiplier increased to %g.", m_Intrinsic, multiplier); } @@ -1541,7 +1532,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() // // int Caller(string s) => Callee(s); // String is 'exact' (sealed) // - multiplier += 2.5; + multiplier += 2.0; JITDUMP("\nCallsite passes %d arguments of exact classes while callee accepts non-exact ones. Multiplier " "increased to %g.", m_ArgIsExactClsSigIsNot, multiplier); @@ -1561,7 +1552,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_FoldableExpr > 0) { // E.g. add/mul/ceq, etc. over constant/constant arguments - multiplier += 1.0 + m_FoldableExpr; + multiplier += m_FoldableExpr; JITDUMP("\nInline has %d foldable binary expressions. Multiplier increased to %g.", m_FoldableExpr, multiplier); } @@ -1569,7 +1560,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_FoldableExprUn > 0) { // E.g. casts, negations, etc. over constants/constant arguments - multiplier += m_FoldableExprUn; + multiplier += m_FoldableExprUn * 0.5; JITDUMP("\nInline has %d foldable unary expressions. Multiplier increased to %g.", m_FoldableExprUn, multiplier); } @@ -1645,21 +1636,22 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_FoldableSwitch > 0) { - multiplier += m_FoldableSwitch * 5.0; - JITDUMP("\nInline candidate has %d foldable switches. Multiplier increased to %g.", m_FoldableSwitch, multiplier); + multiplier += 6.0; + JITDUMP("\nInline candidate has %d foldable switches. Multiplier increased to %g.", m_FoldableSwitch, + multiplier); } else if (m_Switch > 0) { if (m_IsPrejitRoot) { // Assume the switches can be foldable in PrejitRoot mode. - multiplier += m_Switch * 5.0; + multiplier += 6.0; JITDUMP("\nPrejit root candidate has %d switches. Multiplier increased to %g.", m_Switch, multiplier); } else { // TODO: Investigate cases where it makes sense to inline non-foldable switches - multiplier = 0; + multiplier = 0.0; JITDUMP("\nInline candidate has %d switches. Multiplier limited to %g.", m_Switch, multiplier); } } @@ -1685,17 +1677,16 @@ double ExtendedDefaultPolicy::DetermineMultiplier() { multiplier *= min(m_ProfileFrequency, 1.0) * profileScale; } - JITDUMP("\nCallsite has profile data: %g.", m_ProfileFrequency); + JITDUMP("\nCallsite has profile data: %g. Multiplier limited to %g.", m_ProfileFrequency, multiplier); } - if (JitConfig.JitExtDefaultPolicyMaxLclStep() > 0) + // Slow down if there are already too many locals + if (m_RootCompiler->lvaTableCnt > 50) { - const double lclLimitStep = JitConfig.JitMaxLocalsToTrack() / (double)JitConfig.JitExtDefaultPolicyMaxLclStep(); - if (m_RootCompiler->lvaTableCnt > lclLimitStep) - { - multiplier /= (m_RootCompiler->lvaTableCnt / lclLimitStep); - JITDUMP("\nCaller has %d locals. Multiplier decreased to %g.", m_RootCompiler->lvaTableCnt, multiplier); - } + // E.g. MaxLocalsToTrack = 1024 and lvaTableCnt = 512 -> multiplier *= 0.5; + const double lclFullness = min(1.0, (double)m_RootCompiler->lvaTableCnt / JitConfig.JitMaxLocalsToTrack()); + multiplier *= (1.0 - lclFullness); + JITDUMP("\nCaller has %d locals. Multiplier decreased to %g.", m_RootCompiler->lvaTableCnt, multiplier); } if (m_BackwardJump) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 914e617c5345a9..2770c853a9f36a 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -460,9 +460,8 @@ CONFIG_STRING(JitInlineReplayFile, W("JitInlineReplayFile")) // Extended version of DefaultPolicy that includes a more precise IL scan, // relies on PGO if it exists and generally is more aggressive. CONFIG_INTEGER(JitExtDefaultPolicy, W("JitExtDefaultPolicy"), 1) -CONFIG_INTEGER(JitExtDefaultPolicyMaxLclStep, W("JitExtDefaultPolicyMaxLclStep"), 0xA) -CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, W("JitExtDefaultPolicyMaxIL"), 0x400) -CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, W("JitExtDefaultPolicyMaxBB"), 0xA) +CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, W("JitExtDefaultPolicyMaxIL"), 0x80) // 128 +CONFIG_INTEGER(JitExtDefaultPolicyMaxILProf, W("JitExtDefaultPolicyMaxILProf"), 0x200) // 512 // Inliner uses the following formula for PGO-driven decisions: // @@ -473,7 +472,7 @@ CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, W("JitExtDefaultPolicyMaxBB"), 0xA) // (except the cases where inlining in cold blocks improves type info/escape analysis for the whole caller). // For now, it's only applied for dynamic PGO. CONFIG_INTEGER(JitExtDefaultPolicyProfTrust, W("JitExtDefaultPolicyProfTrust"), 0x7) -CONFIG_INTEGER(JitExtDefaultPolicyProfScale, W("JitExtDefaultPolicyProfScale"), 0x2A) +CONFIG_INTEGER(JitExtDefaultPolicyProfScale, W("JitExtDefaultPolicyProfScale"), 0x2E) // 40 -> 4.0 CONFIG_INTEGER(JitInlinePolicyModel, W("JitInlinePolicyModel"), 0) CONFIG_INTEGER(JitInlinePolicyProfile, W("JitInlinePolicyProfile"), 0) From 4cb8387fbcca54eb5920b4280edfdf0c22644f80 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 12 Jul 2021 01:30:50 +0300 Subject: [PATCH 04/17] Fix asserts --- src/coreclr/jit/fgbasic.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index eee45e0564ba04..61c669ed431878 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -890,8 +890,11 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed if (makeInlineObservations) { + // Set default values for profile (to avoid NoteFailed in CALLEE_IL_CODE_SIZE's handler) + // these will be overridden later. + compInlineResult->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, true); + compInlineResult->NoteDouble(InlineObservation::CALLSITE_PROFILE_FREQUENCY, 1.0); // Observe force inline state and code size. - compInlineResult->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, fgHaveProfileData()); compInlineResult->NoteBool(InlineObservation::CALLEE_IS_FORCE_INLINE, isForceInline); compInlineResult->NoteInt(InlineObservation::CALLEE_IL_CODE_SIZE, codeSize); From 8117d8fb333cf00cbc1f01cd11f3bf0e719d8c94 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 12 Jul 2021 01:35:08 +0300 Subject: [PATCH 05/17] Clean up --- src/coreclr/jit/importer.cpp | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index f23f963ed0b42f..5f525afee926d4 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -19069,13 +19069,16 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I inlineResult->NoteInt(InlineObservation::CALLSITE_FREQUENCY, static_cast(frequency)); inlineResult->NoteInt(InlineObservation::CALLSITE_WEIGHT, (int)(weight)); + bool hasProfile = false; + double profileFreq = 0.0; + // If the call site has profile data, report the relative frequency of the site. // if ((pInlineInfo != nullptr) && rootCompiler->fgHaveProfileData() && pInlineInfo->iciBlock->hasProfileWeight()) { const BasicBlock::weight_t callSiteWeight = pInlineInfo->iciBlock->bbWeight; const BasicBlock::weight_t entryWeight = rootCompiler->fgFirstBB->bbWeight; - const BasicBlock::weight_t profileFreq = entryWeight == 0.0f ? 0.0f : callSiteWeight / entryWeight; + profileFreq = entryWeight == 0.0f ? 0.0 : callSiteWeight / entryWeight; assert(callSiteWeight >= 0); assert(entryWeight >= 0); @@ -19086,16 +19089,19 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I ((callSiteWeight + entryWeight) > sufficientSamples)) { // Let's not report profiles for methods with insufficient samples during prejitting. - inlineResult->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, true); + hasProfile = true; inlineResult->NoteDouble(InlineObservation::CALLSITE_PROFILE_FREQUENCY, profileFreq); } } else if ((pInlineInfo == nullptr) && rootCompiler->fgHaveProfileData()) { // Simulate a hot callsite for PrejitRoot mode. - inlineResult->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, true); - inlineResult->NoteDouble(InlineObservation::CALLSITE_PROFILE_FREQUENCY, 1.0); + hasProfile = true; + profileFreq = 1.0; } + + inlineResult->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, hasProfile); + inlineResult->NoteDouble(InlineObservation::CALLSITE_PROFILE_FREQUENCY, profileFreq); } /***************************************************************************** From 5a3effaa7b5437c6dc27a6387a0ca8f631a636b6 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 12 Jul 2021 08:18:20 +0300 Subject: [PATCH 06/17] Fix assert --- src/coreclr/jit/inlinepolicy.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/inlinepolicy.cpp b/src/coreclr/jit/inlinepolicy.cpp index 926f0e26adb9f6..ae7e0ba434b887 100644 --- a/src/coreclr/jit/inlinepolicy.cpp +++ b/src/coreclr/jit/inlinepolicy.cpp @@ -1951,7 +1951,6 @@ void DiscretionaryPolicy::NoteDouble(InlineObservation obs, double value) { assert(obs == InlineObservation::CALLSITE_PROFILE_FREQUENCY); assert(value >= 0.0); - assert(m_ProfileFrequency == 0.0); m_ProfileFrequency = value; } From 40588ff447841c9e45d57d276f30d8b5e1d21d1d Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 12 Jul 2021 10:57:02 +0300 Subject: [PATCH 07/17] Tuning. --- src/coreclr/jit/importer.cpp | 1 - src/coreclr/jit/inlinepolicy.cpp | 18 ++++++++++++++++-- src/coreclr/jit/jitconfigvalues.h | 7 ++++--- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 5f525afee926d4..f2562e85fd2617 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -19090,7 +19090,6 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I { // Let's not report profiles for methods with insufficient samples during prejitting. hasProfile = true; - inlineResult->NoteDouble(InlineObservation::CALLSITE_PROFILE_FREQUENCY, profileFreq); } } else if ((pInlineInfo == nullptr) && rootCompiler->fgHaveProfileData()) diff --git a/src/coreclr/jit/inlinepolicy.cpp b/src/coreclr/jit/inlinepolicy.cpp index ae7e0ba434b887..be717c7de2892b 100644 --- a/src/coreclr/jit/inlinepolicy.cpp +++ b/src/coreclr/jit/inlinepolicy.cpp @@ -1370,7 +1370,21 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value) { SetNever(InlineObservation::CALLEE_DOES_NOT_RETURN); } - break; + else if (!m_IsForceInline && !m_HasProfile) + { + unsigned bbLimit = (unsigned)JitConfig.JitExtDefaultPolicyMaxBB(); + if (m_IsPrejitRoot) + { + // We're not able to recognize arg-specific foldable branches + // in prejit-root mode. + bbLimit += 5 + m_Switch * 10; + } + bbLimit += m_FoldableBranch * 2 + m_FoldableSwitch * 10; + if ((unsigned)value > bbLimit) + { + SetNever(InlineObservation::CALLEE_TOO_MANY_BASIC_BLOCKS); + } + } } default: DefaultPolicy::NoteInt(obs, value); @@ -1455,7 +1469,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_NonGenericCallsGeneric) { - multiplier += 1.5; + multiplier += 2.0; JITDUMP("\nInline candidate is generic and caller is not. Multiplier increased to %g.", multiplier); } diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index b600c46f2eb6d9..64a6ca80ec8dfa 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -463,8 +463,9 @@ CONFIG_STRING(JitInlineReplayFile, W("JitInlineReplayFile")) // Extended version of DefaultPolicy that includes a more precise IL scan, // relies on PGO if it exists and generally is more aggressive. CONFIG_INTEGER(JitExtDefaultPolicy, W("JitExtDefaultPolicy"), 1) -CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, W("JitExtDefaultPolicyMaxIL"), 0x80) // 128 -CONFIG_INTEGER(JitExtDefaultPolicyMaxILProf, W("JitExtDefaultPolicyMaxILProf"), 0x200) // 512 +CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, W("JitExtDefaultPolicyMaxIL"), 0x80) +CONFIG_INTEGER(JitExtDefaultPolicyMaxILProf, W("JitExtDefaultPolicyMaxILProf"), 0x200) +CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, W("JitExtDefaultPolicyMaxBB"), 7) // Inliner uses the following formula for PGO-driven decisions: // @@ -475,7 +476,7 @@ CONFIG_INTEGER(JitExtDefaultPolicyMaxILProf, W("JitExtDefaultPolicyMaxILProf"), // (except the cases where inlining in cold blocks improves type info/escape analysis for the whole caller). // For now, it's only applied for dynamic PGO. CONFIG_INTEGER(JitExtDefaultPolicyProfTrust, W("JitExtDefaultPolicyProfTrust"), 0x7) -CONFIG_INTEGER(JitExtDefaultPolicyProfScale, W("JitExtDefaultPolicyProfScale"), 0x2E) // 40 -> 4.0 +CONFIG_INTEGER(JitExtDefaultPolicyProfScale, W("JitExtDefaultPolicyProfScale"), 0x2E) CONFIG_INTEGER(JitInlinePolicyModel, W("JitInlinePolicyModel"), 0) CONFIG_INTEGER(JitInlinePolicyProfile, W("JitInlinePolicyProfile"), 0) From e19f79f9506afc874ce331af91bb152b7e1fe845 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 12 Jul 2021 10:59:31 +0300 Subject: [PATCH 08/17] add missing break; --- src/coreclr/jit/inlinepolicy.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/inlinepolicy.cpp b/src/coreclr/jit/inlinepolicy.cpp index be717c7de2892b..c82fe6027645db 100644 --- a/src/coreclr/jit/inlinepolicy.cpp +++ b/src/coreclr/jit/inlinepolicy.cpp @@ -1385,6 +1385,7 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value) SetNever(InlineObservation::CALLEE_TOO_MANY_BASIC_BLOCKS); } } + break; } default: DefaultPolicy::NoteInt(obs, value); From 2240f34fd6d31be93f486724730f2aded45ec3a7 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 12 Jul 2021 12:13:33 +0300 Subject: [PATCH 09/17] Tuning for size. --- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/compiler.h | 3 ++- src/coreclr/jit/fgprofile.cpp | 25 +++++++++++++++++++++++++ src/coreclr/jit/importer.cpp | 16 ++++------------ src/coreclr/jit/inlinepolicy.cpp | 12 ++++++------ src/coreclr/jit/jitconfigvalues.h | 4 ++-- 6 files changed, 40 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index e9e50c7b04b66d..fe5734213afcc3 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -6273,7 +6273,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, InlineResult prejitResult(this, methodHnd, "prejit"); // Profile data allows us to avoid early "too many IL bytes" outs. - prejitResult.NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, fgHaveProfileData()); + prejitResult.NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, fgHaveSufficientProfileData()); // Do the initial inline screen. impCanInlineIL(methodHnd, methodInfo, forceInline, &prejitResult); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index d49a3f2ceea36b..9027a5d2335298 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5777,6 +5777,7 @@ class Compiler #endif } + bool fgHaveProfileData(); bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::weight_t* weight); Instrumentor* fgCountInstrumentor; @@ -5813,7 +5814,7 @@ class Compiler void WalkSpanningTree(SpanningTreeVisitor* visitor); void fgSetProfileWeight(BasicBlock* block, BasicBlock::weight_t weight); void fgApplyProfileScale(); - bool fgHaveProfileData(); + bool fgHaveSufficientProfileData(); // fgIsUsingProfileWeights - returns true if we have real profile data for this method // or if we have some fake profile data for the stress mode diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 1659b2d3905293..31d7c208ebaf01 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -47,6 +47,31 @@ bool Compiler::fgHaveProfileData() return (fgPgoSchema != nullptr); } +//------------------------------------------------------------------------ +// fgHaveSufficientProfileData: check if profile data is available +// and is sufficient enough to be trustful. +// +// Returns: +// true if so +// +// Note: +// See notes for fgHaveProfileData. +// +bool Compiler::fgHaveSufficientProfileData() +{ + if (!fgHaveProfileData()) + { + return false; + } + + if ((fgFirstBB != nullptr) && (fgPgoSource == ICorJitInfo::PgoSource::Static)) + { + const BasicBlock::weight_t sufficientSamples = 1000; + return fgFirstBB->bbWeight > sufficientSamples; + } + return true; +} + //------------------------------------------------------------------------ // fgApplyProfileScale: scale inlinee counts by appropriate scale factor // diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index f2562e85fd2617..4507c032806df0 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -19074,25 +19074,17 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I // If the call site has profile data, report the relative frequency of the site. // - if ((pInlineInfo != nullptr) && rootCompiler->fgHaveProfileData() && pInlineInfo->iciBlock->hasProfileWeight()) + if ((pInlineInfo != nullptr) && rootCompiler->fgHaveSufficientProfileData()) { const BasicBlock::weight_t callSiteWeight = pInlineInfo->iciBlock->bbWeight; const BasicBlock::weight_t entryWeight = rootCompiler->fgFirstBB->bbWeight; profileFreq = entryWeight == 0.0f ? 0.0 : callSiteWeight / entryWeight; + hasProfile = true; assert(callSiteWeight >= 0); assert(entryWeight >= 0); - - const BasicBlock::weight_t sufficientSamples = 1000.0f; - - if ((rootCompiler->fgPgoSource != ICorJitInfo::PgoSource::Static) || - ((callSiteWeight + entryWeight) > sufficientSamples)) - { - // Let's not report profiles for methods with insufficient samples during prejitting. - hasProfile = true; - } } - else if ((pInlineInfo == nullptr) && rootCompiler->fgHaveProfileData()) + else if (pInlineInfo == nullptr) { // Simulate a hot callsite for PrejitRoot mode. hasProfile = true; @@ -19247,7 +19239,7 @@ void Compiler::impCheckCanInline(GenTreeCall* call, } // Profile data allows us to avoid early "too many IL bytes" outs. - pParam->result->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, pParam->pThis->fgHaveProfileData()); + pParam->result->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, pParam->pThis->fgHaveSufficientProfileData()); bool forceInline; forceInline = !!(pParam->methAttr & CORINFO_FLG_FORCEINLINE); diff --git a/src/coreclr/jit/inlinepolicy.cpp b/src/coreclr/jit/inlinepolicy.cpp index c82fe6027645db..9d349f270b7f14 100644 --- a/src/coreclr/jit/inlinepolicy.cpp +++ b/src/coreclr/jit/inlinepolicy.cpp @@ -1379,7 +1379,7 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value) // in prejit-root mode. bbLimit += 5 + m_Switch * 10; } - bbLimit += m_FoldableBranch * 2 + m_FoldableSwitch * 10; + bbLimit += m_FoldableBranch + m_FoldableSwitch * 10; if ((unsigned)value > bbLimit) { SetNever(InlineObservation::CALLEE_TOO_MANY_BASIC_BLOCKS); @@ -1470,7 +1470,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_NonGenericCallsGeneric) { - multiplier += 2.0; + multiplier += 1.5; JITDUMP("\nInline candidate is generic and caller is not. Multiplier increased to %g.", multiplier); } @@ -1484,7 +1484,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() // if (Math.Abs(arg0) > 10) { // same here // etc. // - multiplier += 3.0 + m_FoldableBranch; + multiplier += 2.0 + m_FoldableBranch; JITDUMP("\nInline candidate has %d foldable branches. Multiplier increased to %g.", m_FoldableBranch, multiplier); } @@ -1520,7 +1520,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_Intrinsic > 0) { // In most cases such intrinsics are lowered as single CPU instructions - multiplier += 1.0 + m_Intrinsic * 0.3; + multiplier += 1.0 + m_Intrinsic * 0.2; JITDUMP("\nInline has %d intrinsics. Multiplier increased to %g.", m_Intrinsic, multiplier); } @@ -1547,7 +1547,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() // // int Caller(string s) => Callee(s); // String is 'exact' (sealed) // - multiplier += 2.0; + multiplier += 1.5; JITDUMP("\nCallsite passes %d arguments of exact classes while callee accepts non-exact ones. Multiplier " "increased to %g.", m_ArgIsExactClsSigIsNot, multiplier); @@ -1696,7 +1696,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() } // Slow down if there are already too many locals - if (m_RootCompiler->lvaTableCnt > 50) + if (m_RootCompiler->lvaTableCnt > 16) { // E.g. MaxLocalsToTrack = 1024 and lvaTableCnt = 512 -> multiplier *= 0.5; const double lclFullness = min(1.0, (double)m_RootCompiler->lvaTableCnt / JitConfig.JitMaxLocalsToTrack()); diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 64a6ca80ec8dfa..d9a1c71bed4e61 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -463,7 +463,7 @@ CONFIG_STRING(JitInlineReplayFile, W("JitInlineReplayFile")) // Extended version of DefaultPolicy that includes a more precise IL scan, // relies on PGO if it exists and generally is more aggressive. CONFIG_INTEGER(JitExtDefaultPolicy, W("JitExtDefaultPolicy"), 1) -CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, W("JitExtDefaultPolicyMaxIL"), 0x80) +CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, W("JitExtDefaultPolicyMaxIL"), 0x78) CONFIG_INTEGER(JitExtDefaultPolicyMaxILProf, W("JitExtDefaultPolicyMaxILProf"), 0x200) CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, W("JitExtDefaultPolicyMaxBB"), 7) @@ -476,7 +476,7 @@ CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, W("JitExtDefaultPolicyMaxBB"), 7) // (except the cases where inlining in cold blocks improves type info/escape analysis for the whole caller). // For now, it's only applied for dynamic PGO. CONFIG_INTEGER(JitExtDefaultPolicyProfTrust, W("JitExtDefaultPolicyProfTrust"), 0x7) -CONFIG_INTEGER(JitExtDefaultPolicyProfScale, W("JitExtDefaultPolicyProfScale"), 0x2E) +CONFIG_INTEGER(JitExtDefaultPolicyProfScale, W("JitExtDefaultPolicyProfScale"), 0x2A) CONFIG_INTEGER(JitInlinePolicyModel, W("JitInlinePolicyModel"), 0) CONFIG_INTEGER(JitInlinePolicyProfile, W("JitInlinePolicyProfile"), 0) From d8f4c4594ec9346c48ab71ccae6975e201c0afd0 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 12 Jul 2021 12:28:40 +0300 Subject: [PATCH 10/17] Tuning. --- src/coreclr/jit/jitconfigvalues.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index d9a1c71bed4e61..252ea35be38d12 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -463,7 +463,7 @@ CONFIG_STRING(JitInlineReplayFile, W("JitInlineReplayFile")) // Extended version of DefaultPolicy that includes a more precise IL scan, // relies on PGO if it exists and generally is more aggressive. CONFIG_INTEGER(JitExtDefaultPolicy, W("JitExtDefaultPolicy"), 1) -CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, W("JitExtDefaultPolicyMaxIL"), 0x78) +CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, W("JitExtDefaultPolicyMaxIL"), 0x80) CONFIG_INTEGER(JitExtDefaultPolicyMaxILProf, W("JitExtDefaultPolicyMaxILProf"), 0x200) CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, W("JitExtDefaultPolicyMaxBB"), 7) @@ -476,7 +476,7 @@ CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, W("JitExtDefaultPolicyMaxBB"), 7) // (except the cases where inlining in cold blocks improves type info/escape analysis for the whole caller). // For now, it's only applied for dynamic PGO. CONFIG_INTEGER(JitExtDefaultPolicyProfTrust, W("JitExtDefaultPolicyProfTrust"), 0x7) -CONFIG_INTEGER(JitExtDefaultPolicyProfScale, W("JitExtDefaultPolicyProfScale"), 0x2A) +CONFIG_INTEGER(JitExtDefaultPolicyProfScale, W("JitExtDefaultPolicyProfScale"), 0x2D) CONFIG_INTEGER(JitInlinePolicyModel, W("JitInlinePolicyModel"), 0) CONFIG_INTEGER(JitInlinePolicyProfile, W("JitInlinePolicyProfile"), 0) From f6e7c1397e533c15f09177e1760919a8f131c474 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 12 Jul 2021 12:44:55 +0300 Subject: [PATCH 11/17] Formatting --- src/coreclr/jit/fgprofile.cpp | 2 +- src/coreclr/jit/importer.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 31d7c208ebaf01..b2deeb6bf02bc1 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -66,7 +66,7 @@ bool Compiler::fgHaveSufficientProfileData() if ((fgFirstBB != nullptr) && (fgPgoSource == ICorJitInfo::PgoSource::Static)) { - const BasicBlock::weight_t sufficientSamples = 1000; + const BasicBlock::weight_t sufficientSamples = 5000; return fgFirstBB->bbWeight > sufficientSamples; } return true; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 4507c032806df0..55a8063ac7ce3c 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -19239,7 +19239,8 @@ void Compiler::impCheckCanInline(GenTreeCall* call, } // Profile data allows us to avoid early "too many IL bytes" outs. - pParam->result->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, pParam->pThis->fgHaveSufficientProfileData()); + pParam->result->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, + pParam->pThis->fgHaveSufficientProfileData()); bool forceInline; forceInline = !!(pParam->methAttr & CORINFO_FLG_FORCEINLINE); From 970a2cd4f9a3843c52546cb6c3e126fc46ef8f17 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 12 Jul 2021 12:55:30 +0300 Subject: [PATCH 12/17] Tuning. --- src/coreclr/jit/jitconfigvalues.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 252ea35be38d12..049f26bb1a8e35 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -476,7 +476,7 @@ CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, W("JitExtDefaultPolicyMaxBB"), 7) // (except the cases where inlining in cold blocks improves type info/escape analysis for the whole caller). // For now, it's only applied for dynamic PGO. CONFIG_INTEGER(JitExtDefaultPolicyProfTrust, W("JitExtDefaultPolicyProfTrust"), 0x7) -CONFIG_INTEGER(JitExtDefaultPolicyProfScale, W("JitExtDefaultPolicyProfScale"), 0x2D) +CONFIG_INTEGER(JitExtDefaultPolicyProfScale, W("JitExtDefaultPolicyProfScale"), 0x2A) CONFIG_INTEGER(JitInlinePolicyModel, W("JitInlinePolicyModel"), 0) CONFIG_INTEGER(JitInlinePolicyProfile, W("JitInlinePolicyProfile"), 0) From a7ecdf60ad6d40d8e4e8e7970d42a4187573c33a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 12 Jul 2021 16:47:54 +0300 Subject: [PATCH 13/17] Revert some changes --- src/coreclr/jit/inlinepolicy.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/inlinepolicy.cpp b/src/coreclr/jit/inlinepolicy.cpp index 9d349f270b7f14..29f1cf329c5ada 100644 --- a/src/coreclr/jit/inlinepolicy.cpp +++ b/src/coreclr/jit/inlinepolicy.cpp @@ -1484,7 +1484,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() // if (Math.Abs(arg0) > 10) { // same here // etc. // - multiplier += 2.0 + m_FoldableBranch; + multiplier += 3.0 + m_FoldableBranch; JITDUMP("\nInline candidate has %d foldable branches. Multiplier increased to %g.", m_FoldableBranch, multiplier); } @@ -1520,7 +1520,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_Intrinsic > 0) { // In most cases such intrinsics are lowered as single CPU instructions - multiplier += 1.0 + m_Intrinsic * 0.2; + multiplier += 1.0 + m_Intrinsic * 0.3; JITDUMP("\nInline has %d intrinsics. Multiplier increased to %g.", m_Intrinsic, multiplier); } @@ -1547,7 +1547,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() // // int Caller(string s) => Callee(s); // String is 'exact' (sealed) // - multiplier += 1.5; + multiplier += 2.5; JITDUMP("\nCallsite passes %d arguments of exact classes while callee accepts non-exact ones. Multiplier " "increased to %g.", m_ArgIsExactClsSigIsNot, multiplier); @@ -1567,7 +1567,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_FoldableExpr > 0) { // E.g. add/mul/ceq, etc. over constant/constant arguments - multiplier += m_FoldableExpr; + multiplier += 1.0 + m_FoldableExpr; JITDUMP("\nInline has %d foldable binary expressions. Multiplier increased to %g.", m_FoldableExpr, multiplier); } @@ -1575,7 +1575,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_FoldableExprUn > 0) { // E.g. casts, negations, etc. over constants/constant arguments - multiplier += m_FoldableExprUn * 0.5; + multiplier += m_FoldableExprUn; JITDUMP("\nInline has %d foldable unary expressions. Multiplier increased to %g.", m_FoldableExprUn, multiplier); } @@ -1696,7 +1696,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier() } // Slow down if there are already too many locals - if (m_RootCompiler->lvaTableCnt > 16) + if (m_RootCompiler->lvaTableCnt > 64) { // E.g. MaxLocalsToTrack = 1024 and lvaTableCnt = 512 -> multiplier *= 0.5; const double lclFullness = min(1.0, (double)m_RootCompiler->lvaTableCnt / JitConfig.JitMaxLocalsToTrack()); From db0f3bcf7ee8e4957776fde3fd791ef92419c12a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Mon, 12 Jul 2021 23:31:44 +0300 Subject: [PATCH 14/17] Fix cases where "arg op cns" left "cns" on top of the pushedstack -- it produced many false-positive foldable switches/branches. --- src/coreclr/jit/fgbasic.cpp | 3 ++- src/coreclr/jit/fgprofile.cpp | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 61c669ed431878..3d12b84b0e5080 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -1388,6 +1388,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed else if (FgStack::IsArgument(arg0) && FgStack::IsConstantOrConstArg(arg1, impInlineInfo)) { // "Arg op CNS" --> keep arg0 in the stack for the next ops + pushedStack.Push(arg0); handled = true; compInlineResult->Note(InlineObservation::CALLEE_BINARY_EXRP_WITH_CNS); } @@ -1396,7 +1397,6 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed else if (FgStack::IsArgument(arg1) && FgStack::IsConstantOrConstArg(arg0, impInlineInfo)) { // "CNS op ARG" --> keep arg1 in the stack for the next ops - pushedStack.Push(arg1); handled = true; compInlineResult->Note(InlineObservation::CALLEE_BINARY_EXRP_WITH_CNS); } @@ -1409,6 +1409,7 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed { compInlineResult->Note(InlineObservation::CALLSITE_DIV_BY_CNS); } + pushedStack.Push(arg0); handled = true; } } diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index b2deeb6bf02bc1..31d7c208ebaf01 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -66,7 +66,7 @@ bool Compiler::fgHaveSufficientProfileData() if ((fgFirstBB != nullptr) && (fgPgoSource == ICorJitInfo::PgoSource::Static)) { - const BasicBlock::weight_t sufficientSamples = 5000; + const BasicBlock::weight_t sufficientSamples = 1000; return fgFirstBB->bbWeight > sufficientSamples; } return true; From 4d0bf98071f403f5b6d5edc174f34d20ffc3eb18 Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 13 Jul 2021 01:26:03 +0300 Subject: [PATCH 15/17] Fix an assert when we import a switch (in jitdump mode) --- src/coreclr/jit/block.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/block.cpp b/src/coreclr/jit/block.cpp index 128c4e033d9fa5..5df8c1e4358c23 100644 --- a/src/coreclr/jit/block.cpp +++ b/src/coreclr/jit/block.cpp @@ -1431,6 +1431,7 @@ BasicBlock* Compiler::bbNewBasicBlock(BBjumpKinds jumpKind) /* Give the block a number, set the ancestor count and weight */ ++fgBBcount; + ++fgBBNumMax; if (compIsForInlining()) { @@ -1438,7 +1439,7 @@ BasicBlock* Compiler::bbNewBasicBlock(BBjumpKinds jumpKind) } else { - block->bbNum = ++fgBBNumMax; + block->bbNum = fgBBNumMax; } if (compRationalIRForm) From e948d104d1ab06f4261406126614fc0110aefe2a Mon Sep 17 00:00:00 2001 From: EgorBo Date: Tue, 13 Jul 2021 16:43:11 +0300 Subject: [PATCH 16/17] Disable over-ILsize for generic static pgo --- src/coreclr/jit/inlinepolicy.cpp | 11 ++++++----- src/coreclr/jit/jitconfigvalues.h | 2 +- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/inlinepolicy.cpp b/src/coreclr/jit/inlinepolicy.cpp index 29f1cf329c5ada..e84ff2858a011d 100644 --- a/src/coreclr/jit/inlinepolicy.cpp +++ b/src/coreclr/jit/inlinepolicy.cpp @@ -1337,7 +1337,8 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value) m_CodeSize = static_cast(value); unsigned maxCodeSize = static_cast(JitConfig.JitExtDefaultPolicyMaxIL()); - if (m_HasProfile) + // TODO: Enable for PgoSource::Static as well if it's not the generic profile we bundle. + if (m_HasProfile && (m_RootCompiler->fgPgoSource == ICorJitInfo::PgoSource::Dynamic)) { maxCodeSize = static_cast(JitConfig.JitExtDefaultPolicyMaxILProf()); } @@ -1432,13 +1433,13 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_ReturnsStructByValue) { // For structs-passed-by-value we might avoid expensive copy operations if we inline. - multiplier += 1.5; + multiplier += 2.0; JITDUMP("\nInline candidate returns a struct by value. Multiplier increased to %g.", multiplier); } else if (m_ArgIsStructByValue > 0) { // Same here - multiplier += 1.5; + multiplier += 2.0; JITDUMP("\n%d arguments are structs passed by value. Multiplier increased to %g.", m_ArgIsStructByValue, multiplier); } @@ -1464,13 +1465,13 @@ double ExtendedDefaultPolicy::DetermineMultiplier() if (m_ArgFeedsRangeCheck > 0) { - multiplier += 0.5; + multiplier += 1.0; JITDUMP("\nInline candidate has arg that feeds range check. Multiplier increased to %g.", multiplier); } if (m_NonGenericCallsGeneric) { - multiplier += 1.5; + multiplier += 2.0; JITDUMP("\nInline candidate is generic and caller is not. Multiplier increased to %g.", multiplier); } diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 049f26bb1a8e35..c1a841bfa787e3 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -464,7 +464,7 @@ CONFIG_STRING(JitInlineReplayFile, W("JitInlineReplayFile")) // relies on PGO if it exists and generally is more aggressive. CONFIG_INTEGER(JitExtDefaultPolicy, W("JitExtDefaultPolicy"), 1) CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, W("JitExtDefaultPolicyMaxIL"), 0x80) -CONFIG_INTEGER(JitExtDefaultPolicyMaxILProf, W("JitExtDefaultPolicyMaxILProf"), 0x200) +CONFIG_INTEGER(JitExtDefaultPolicyMaxILProf, W("JitExtDefaultPolicyMaxILProf"), 0x400) CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, W("JitExtDefaultPolicyMaxBB"), 7) // Inliner uses the following formula for PGO-driven decisions: From ab9f54241665f8c4b91a3e9442c8779a975b508d Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 13 Jul 2021 20:25:13 +0300 Subject: [PATCH 17/17] Update fgbasic.cpp --- src/coreclr/jit/fgbasic.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 3d12b84b0e5080..6c65586fbca1c8 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -883,10 +883,12 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0; const bool makeInlineObservations = (compInlineResult != nullptr); const bool isInlining = compIsForInlining(); + const bool isPreJit = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT); + const bool isTier1 = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1); unsigned retBlocks = 0; int prefixFlags = 0; bool preciseScan = makeInlineObservations && compInlineResult->GetPolicy()->RequiresPreciseScan(); - const bool resolveTokens = preciseScan; + const bool resolveTokens = preciseScan && (isPreJit || isTier1); if (makeInlineObservations) {