Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Tuning.
  • Loading branch information
EgorBo committed Jul 11, 2021
commit 6dd92ca3ae7b1636206ee27afc1dcc32e1e52f34
3 changes: 3 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6266,6 +6266,9 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
// a potential inline candidate.
InlineResult prejitResult(this, methodHnd, "prejit");

// Profile data allows us to avoid early "too many IL bytes" outs.
prejitResult.NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, fgHaveProfileData());

// Do the initial inline screen.
impCanInlineIL(methodHnd, methodInfo, forceInline, &prejitResult);

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5777,7 +5777,6 @@ class Compiler
#endif
}

bool fgHaveProfileData();
bool fgGetProfileWeightForBasicBlock(IL_OFFSET offset, BasicBlock::weight_t* weight);

Instrumentor* fgCountInstrumentor;
Expand Down Expand Up @@ -5814,6 +5813,7 @@ class Compiler
void WalkSpanningTree(SpanningTreeVisitor* visitor);
void fgSetProfileWeight(BasicBlock* block, BasicBlock::weight_t weight);
void fgApplyProfileScale();
bool fgHaveProfileData();

// fgIsUsingProfileWeights - returns true if we have real profile data for this method
// or if we have some fake profile data for the stress mode
Expand Down
62 changes: 43 additions & 19 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -818,8 +818,24 @@ class FgStack
return false;
}
const unsigned argNum = value - SLOT_ARGUMENT;
assert(argNum < info->argCnt);
return info->inlArgInfo[argNum].argIsInvariant;
if (argNum < info->argCnt)
{
return info->inlArgInfo[argNum].argIsInvariant;
}
return false;
}
static bool IsExactArgument(FgSlot value, InlineInfo* info)
{
if ((info == nullptr) || !IsArgument(value))
{
return false;
}
const unsigned argNum = value - SLOT_ARGUMENT;
if (argNum < info->argCnt)
{
return info->inlArgInfo[argNum].argIsExact;
}
return false;
}
static unsigned SlotTypeToArgNum(FgSlot value)
{
Expand Down Expand Up @@ -867,16 +883,15 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
const bool makeInlineObservations = (compInlineResult != nullptr);
const bool isInlining = compIsForInlining();
const bool isPreJit = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT);
const bool isTier1 = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1);
unsigned retBlocks = 0;
int prefixFlags = 0;
bool preciseScan = makeInlineObservations && compInlineResult->GetPolicy()->RequiresPreciseScan();
const bool resolveTokens = preciseScan && (isPreJit || isTier1);
const bool resolveTokens = preciseScan;

if (makeInlineObservations)
{
// Observe force inline state and code size.
compInlineResult->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, fgHaveProfileData());
compInlineResult->NoteBool(InlineObservation::CALLEE_IS_FORCE_INLINE, isForceInline);
compInlineResult->NoteInt(InlineObservation::CALLEE_IL_CODE_SIZE, codeSize);

Expand Down Expand Up @@ -1031,7 +1046,8 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
if (makeInlineObservations)
{
FgStack::FgSlot slot = pushedStack.Top();
if (FgStack::IsConstantOrConstArg(slot, impInlineInfo))
if (FgStack::IsConstantOrConstArg(slot, impInlineInfo) ||
FgStack::IsExactArgument(slot, impInlineInfo))
{
compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_EXPR_UN);
handled = true; // and keep argument in the pushedStack
Expand Down Expand Up @@ -1338,45 +1354,53 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
FgStack::FgSlot arg0 = pushedStack.Top(1);
FgStack::FgSlot arg1 = pushedStack.Top(0);

if ((FgStack::IsConstant(arg0) && FgStack::IsConstArgument(arg1, impInlineInfo)) ||
(FgStack::IsConstant(arg1) && FgStack::IsConstArgument(arg0, impInlineInfo)) ||
(FgStack::IsConstArgument(arg0, impInlineInfo) &&
FgStack::IsConstArgument(arg1, impInlineInfo)))
// Const op ConstArg -> ConstArg
if (FgStack::IsConstant(arg0) && FgStack::IsConstArgument(arg1, impInlineInfo))
{
// keep stack unchanged
handled = true;
compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_EXPR);
}
else if ((FgStack::IsConstant(arg0) && FgStack::IsConstant(arg1)) ||
(FgStack::IsConstant(arg1) && FgStack::IsConstant(arg0)))
// ConstArg op Const -> ConstArg
// ConstArg op ConstArg -> ConstArg
else if (FgStack::IsConstArgument(arg0, impInlineInfo) &&
FgStack::IsConstantOrConstArg(arg1, impInlineInfo))
{
// both are constants, but we're mostly interested in cases where a const arg leads to
// a foldable expression.
if (FgStack::IsConstant(arg1))
{
pushedStack.Push(arg0);
}
handled = true;
compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_EXPR);
}
else if ((FgStack::IsArgument(arg0) == FgStack::IsArgument(arg1)) && (arg0 == arg1))
// Const op Const -> Const
else if (FgStack::IsConstant(arg0) && FgStack::IsConstant(arg1))
{
// Both args are the same
// both are constants, but we're mostly interested in cases where a const arg leads to
// a foldable expression.
handled = true;
compInlineResult->Note(InlineObservation::CALLSITE_FOLDABLE_EXPR);
}
// Arg op ConstArg
// Arg op Const
else if (FgStack::IsArgument(arg0) && FgStack::IsConstantOrConstArg(arg1, impInlineInfo))
{
// "Arg op CNS" --> keep arg0 in the stack for the next ops
handled = true;
compInlineResult->Note(InlineObservation::CALLEE_BINARY_EXRP_WITH_CNS);
}
// ConstArg op Arg
// Const op Arg
else if (FgStack::IsArgument(arg1) && FgStack::IsConstantOrConstArg(arg0, impInlineInfo))
{
// "CNS op ARG" --> keep arg1 in the stack for the next ops
pushedStack.Push(arg1);
handled = true;
compInlineResult->Note(InlineObservation::CALLEE_BINARY_EXRP_WITH_CNS);
}

// X / ConstArg
// X % ConstArg
if (FgStack::IsConstArgument(arg1, impInlineInfo))
{
// Special case: "X / ConstArg" or "X % ConstArg"
if ((opcode == CEE_DIV) || (opcode == CEE_DIV_UN) || (opcode == CEE_REM) ||
(opcode == CEE_REM_UN))
{
Expand Down
17 changes: 12 additions & 5 deletions src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19073,16 +19073,16 @@ void Compiler::impMakeDiscretionaryInlineObservations(InlineInfo* pInlineInfo, I
//
if ((pInlineInfo != nullptr) && rootCompiler->fgHaveProfileData() && pInlineInfo->iciBlock->hasProfileWeight())
{
BasicBlock::weight_t callSiteWeight = pInlineInfo->iciBlock->bbWeight;
BasicBlock::weight_t entryWeight = rootCompiler->fgFirstBB->bbWeight;
BasicBlock::weight_t profileFreq = entryWeight == 0.0f ? 0.0f : callSiteWeight / entryWeight;
const BasicBlock::weight_t callSiteWeight = pInlineInfo->iciBlock->bbWeight;
const BasicBlock::weight_t entryWeight = rootCompiler->fgFirstBB->bbWeight;
const BasicBlock::weight_t profileFreq = entryWeight == 0.0f ? 0.0f : callSiteWeight / entryWeight;

assert(callSiteWeight >= 0);
assert(entryWeight >= 0);

BasicBlock::weight_t sufficientSamples = 5000.0f;
const BasicBlock::weight_t sufficientSamples = 1000.0f;

if (!rootCompiler->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT) ||
if ((rootCompiler->fgPgoSource != ICorJitInfo::PgoSource::Static) ||
((callSiteWeight + entryWeight) > sufficientSamples))
{
// Let's not report profiles for methods with insufficient samples during prejitting.
Expand Down Expand Up @@ -19241,6 +19241,9 @@ void Compiler::impCheckCanInline(GenTreeCall* call,
goto _exit;
}

// Profile data allows us to avoid early "too many IL bytes" outs.
pParam->result->NoteBool(InlineObservation::CALLSITE_HAS_PROFILE, pParam->pThis->fgHaveProfileData());

bool forceInline;
forceInline = !!(pParam->methAttr & CORINFO_FLG_FORCEINLINE);

Expand Down Expand Up @@ -19463,6 +19466,10 @@ void Compiler::impInlineRecordArgInfo(InlineInfo* pInlineInfo,
}
}

bool isExact = false;
bool isNonNull = false;
inlCurArgInfo->argIsExact = (gtGetClassHandle(curArgVal, &isExact, &isNonNull) != NO_CLASS_HANDLE) && isExact;

// If the arg is a local that is address-taken, we can't safely
// directly substitute it into the inlinee.
//
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/inline.h
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,7 @@ struct InlArgInfo
unsigned argHasStargOp : 1; // Is there STARG(s) operation on this argument?
unsigned argIsByRefToStructLocal : 1; // Is this arg an address of a struct local or a normed struct local or a
// field in them?
unsigned argIsExact : 1; // Is this arg of an exact class?
};

// InlLclVarInfo describes inline candidate argument and local variable properties.
Expand Down
61 changes: 26 additions & 35 deletions src/coreclr/jit/inlinepolicy.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1334,7 +1334,13 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value)
{
assert(m_IsForceInlineKnown);
assert(value != 0);
m_CodeSize = static_cast<unsigned>(value);
m_CodeSize = static_cast<unsigned>(value);
unsigned maxCodeSize = static_cast<unsigned>(JitConfig.JitExtDefaultPolicyMaxIL());

if (m_HasProfile)
{
maxCodeSize = static_cast<unsigned>(JitConfig.JitExtDefaultPolicyMaxILProf());
}

if (m_IsForceInline)
{
Expand All @@ -1346,7 +1352,7 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value)
// Candidate based on small size
SetCandidate(InlineObservation::CALLEE_BELOW_ALWAYS_INLINE_SIZE);
}
else if (m_CodeSize <= (unsigned)JitConfig.JitExtDefaultPolicyMaxIL())
else if (m_CodeSize <= maxCodeSize)
{
// Candidate, pending profitability evaluation
SetCandidate(InlineObservation::CALLEE_IS_DISCRETIONARY_INLINE);
Expand All @@ -1364,21 +1370,6 @@ void ExtendedDefaultPolicy::NoteInt(InlineObservation obs, int value)
{
SetNever(InlineObservation::CALLEE_DOES_NOT_RETURN);
}
else if (!m_IsForceInline)
{
unsigned bbLimit = (unsigned)JitConfig.JitExtDefaultPolicyMaxBB();
if (m_IsPrejitRoot)
{
// We're not able to recognize arg-specific foldable branches
// in prejit-root mode.
bbLimit += 5 + m_Switch * 10;
}
bbLimit += m_FoldableBranch * 2 + m_FoldableSwitch * 10;
if ((unsigned)value > bbLimit)
{
SetNever(InlineObservation::CALLEE_TOO_MANY_BASIC_BLOCKS);
}
}
break;
}
default:
Expand Down Expand Up @@ -1426,13 +1417,13 @@ double ExtendedDefaultPolicy::DetermineMultiplier()
if (m_ReturnsStructByValue)
{
// For structs-passed-by-value we might avoid expensive copy operations if we inline.
multiplier += 2.0;
multiplier += 1.5;
JITDUMP("\nInline candidate returns a struct by value. Multiplier increased to %g.", multiplier);
}
else if (m_ArgIsStructByValue > 0)
{
// Same here
multiplier += 2.0;
multiplier += 1.5;
JITDUMP("\n%d arguments are structs passed by value. Multiplier increased to %g.", m_ArgIsStructByValue,
multiplier);
}
Expand Down Expand Up @@ -1514,7 +1505,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier()
if (m_Intrinsic > 0)
{
// In most cases such intrinsics are lowered as single CPU instructions
multiplier += 1.0 + m_Intrinsic * 0.2;
multiplier += 1.0 + m_Intrinsic * 0.3;
JITDUMP("\nInline has %d intrinsics. Multiplier increased to %g.", m_Intrinsic, multiplier);
}

Expand All @@ -1541,7 +1532,7 @@ double ExtendedDefaultPolicy::DetermineMultiplier()
//
// int Caller(string s) => Callee(s); // String is 'exact' (sealed)
//
multiplier += 2.5;
multiplier += 2.0;
JITDUMP("\nCallsite passes %d arguments of exact classes while callee accepts non-exact ones. Multiplier "
"increased to %g.",
m_ArgIsExactClsSigIsNot, multiplier);
Expand All @@ -1561,15 +1552,15 @@ double ExtendedDefaultPolicy::DetermineMultiplier()
if (m_FoldableExpr > 0)
{
// E.g. add/mul/ceq, etc. over constant/constant arguments
multiplier += 1.0 + m_FoldableExpr;
multiplier += m_FoldableExpr;
JITDUMP("\nInline has %d foldable binary expressions. Multiplier increased to %g.", m_FoldableExpr,
multiplier);
}

if (m_FoldableExprUn > 0)
{
// E.g. casts, negations, etc. over constants/constant arguments
multiplier += m_FoldableExprUn;
multiplier += m_FoldableExprUn * 0.5;
JITDUMP("\nInline has %d foldable unary expressions. Multiplier increased to %g.", m_FoldableExprUn,
multiplier);
}
Expand Down Expand Up @@ -1645,21 +1636,22 @@ double ExtendedDefaultPolicy::DetermineMultiplier()

if (m_FoldableSwitch > 0)
{
multiplier += m_FoldableSwitch * 5.0;
JITDUMP("\nInline candidate has %d foldable switches. Multiplier increased to %g.", m_FoldableSwitch, multiplier);
multiplier += 6.0;
JITDUMP("\nInline candidate has %d foldable switches. Multiplier increased to %g.", m_FoldableSwitch,
multiplier);
}
else if (m_Switch > 0)
{
if (m_IsPrejitRoot)
{
// Assume the switches can be foldable in PrejitRoot mode.
multiplier += m_Switch * 5.0;
multiplier += 6.0;
JITDUMP("\nPrejit root candidate has %d switches. Multiplier increased to %g.", m_Switch, multiplier);
}
else
{
// TODO: Investigate cases where it makes sense to inline non-foldable switches
multiplier = 0;
multiplier = 0.0;
JITDUMP("\nInline candidate has %d switches. Multiplier limited to %g.", m_Switch, multiplier);
}
}
Expand All @@ -1685,17 +1677,16 @@ double ExtendedDefaultPolicy::DetermineMultiplier()
{
multiplier *= min(m_ProfileFrequency, 1.0) * profileScale;
}
JITDUMP("\nCallsite has profile data: %g.", m_ProfileFrequency);
JITDUMP("\nCallsite has profile data: %g. Multiplier limited to %g.", m_ProfileFrequency, multiplier);
}

if (JitConfig.JitExtDefaultPolicyMaxLclStep() > 0)
// Slow down if there are already too many locals
if (m_RootCompiler->lvaTableCnt > 50)
{
const double lclLimitStep = JitConfig.JitMaxLocalsToTrack() / (double)JitConfig.JitExtDefaultPolicyMaxLclStep();
if (m_RootCompiler->lvaTableCnt > lclLimitStep)
{
multiplier /= (m_RootCompiler->lvaTableCnt / lclLimitStep);
JITDUMP("\nCaller has %d locals. Multiplier decreased to %g.", m_RootCompiler->lvaTableCnt, multiplier);
}
// E.g. MaxLocalsToTrack = 1024 and lvaTableCnt = 512 -> multiplier *= 0.5;
const double lclFullness = min(1.0, (double)m_RootCompiler->lvaTableCnt / JitConfig.JitMaxLocalsToTrack());
multiplier *= (1.0 - lclFullness);
JITDUMP("\nCaller has %d locals. Multiplier decreased to %g.", m_RootCompiler->lvaTableCnt, multiplier);
}

if (m_BackwardJump)
Expand Down
7 changes: 3 additions & 4 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -460,9 +460,8 @@ CONFIG_STRING(JitInlineReplayFile, W("JitInlineReplayFile"))
// Extended version of DefaultPolicy that includes a more precise IL scan,
// relies on PGO if it exists and generally is more aggressive.
CONFIG_INTEGER(JitExtDefaultPolicy, W("JitExtDefaultPolicy"), 1)
CONFIG_INTEGER(JitExtDefaultPolicyMaxLclStep, W("JitExtDefaultPolicyMaxLclStep"), 0xA)
CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, W("JitExtDefaultPolicyMaxIL"), 0x400)
CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, W("JitExtDefaultPolicyMaxBB"), 0xA)
CONFIG_INTEGER(JitExtDefaultPolicyMaxIL, W("JitExtDefaultPolicyMaxIL"), 0x80) // 128
CONFIG_INTEGER(JitExtDefaultPolicyMaxILProf, W("JitExtDefaultPolicyMaxILProf"), 0x200) // 512

// Inliner uses the following formula for PGO-driven decisions:
//
Expand All @@ -473,7 +472,7 @@ CONFIG_INTEGER(JitExtDefaultPolicyMaxBB, W("JitExtDefaultPolicyMaxBB"), 0xA)
// (except the cases where inlining in cold blocks improves type info/escape analysis for the whole caller).
// For now, it's only applied for dynamic PGO.
CONFIG_INTEGER(JitExtDefaultPolicyProfTrust, W("JitExtDefaultPolicyProfTrust"), 0x7)
CONFIG_INTEGER(JitExtDefaultPolicyProfScale, W("JitExtDefaultPolicyProfScale"), 0x2A)
CONFIG_INTEGER(JitExtDefaultPolicyProfScale, W("JitExtDefaultPolicyProfScale"), 0x2E) // 40 -> 4.0

CONFIG_INTEGER(JitInlinePolicyModel, W("JitInlinePolicyModel"), 0)
CONFIG_INTEGER(JitInlinePolicyProfile, W("JitInlinePolicyProfile"), 0)
Expand Down