Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions eng/pipelines/common/templates/runtimes/run-test-job.yml
Original file line number Diff line number Diff line change
Expand Up @@ -554,7 +554,10 @@ jobs:
scenarios:
- jitosr
- jitosr_stress
- jitosr_pgo
- jitpartialcompilation
- jitpartialcompilation_osr
- jitpartialcompilation_osr_pgo
- jitobjectstackallocation
${{ if in(parameters.testGroup, 'ilasm') }}:
scenarios:
Expand Down
38 changes: 38 additions & 0 deletions src/coreclr/inc/pgo_formatprocessing.h
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,44 @@ bool ReadInstrumentationSchemaWithLayout(const uint8_t *pByte, size_t cbDataMax,
});
}


// Return true if schemaTable entries are a subset of the schema described by pByte, with matching entries in the same order.
// Also updates offset of the matching entries in schemaTable to those of the pByte schema.
//
inline bool ComparePgoSchemaCompatible(const uint8_t *pByte, size_t cbDataMax, ICorJitInfo::PgoInstrumentationSchema* schemaTable, size_t cSchemas)
Copy link
Member

@jakobbotsch jakobbotsch Nov 11, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was a little surprised that this mutates the schema. Maybe rename it to something like SetOffsetsForSchemaSubsequence or similar?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about CheckIfPgoSchemaIsCompatibleAndSetOffsets?

{
size_t nSchema = 0;
size_t nMatched = 0;
size_t nUnmatched = 0;
size_t initialOffset = cbDataMax;

auto handler = [schemaTable, &nSchema, &nMatched, &nUnmatched](const ICorJitInfo::PgoInstrumentationSchema& schema)
{
const size_t iSchemaAdj = nSchema - nUnmatched;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this the same as nMatched?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And thinking a little bit more about it, that means this might index off the end of schemaTable.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is, yeah.


if ((schema.InstrumentationKind != schemaTable[iSchemaAdj].InstrumentationKind)
|| (schema.ILOffset != schemaTable[iSchemaAdj].ILOffset)
|| (schema.Count != schemaTable[iSchemaAdj].Count)
|| (schema.Other != schemaTable[iSchemaAdj].Other))
{
nUnmatched++;
}
else
{
schemaTable[iSchemaAdj].Offset = schema.Offset;
nMatched++;
}

nSchema++;

return true;
};

ReadInstrumentationSchemaWithLayout(pByte, cbDataMax, initialOffset, handler);

return (nMatched == cSchemas);
}

inline bool ReadInstrumentationSchemaWithLayoutIntoSArray(const uint8_t *pByte, size_t cbDataMax, size_t initialOffset, SArray<ICorJitInfo::PgoInstrumentationSchema>* pSchemas)
{
auto lambda = [pSchemas](const ICorJitInfo::PgoInstrumentationSchema &schema)
Expand Down
81 changes: 55 additions & 26 deletions src/coreclr/jit/fgprofile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -300,10 +300,10 @@ class Instrumentor
virtual void BuildSchemaElements(BasicBlock* block, Schema& schema)
{
}
virtual void Instrument(BasicBlock* block, Schema& schema, BYTE* profileMemory)
virtual void Instrument(BasicBlock* block, Schema& schema, uint8_t* profileMemory)
{
}
virtual void InstrumentMethodEntry(Schema& schema, BYTE* profileMemory)
virtual void InstrumentMethodEntry(Schema& schema, uint8_t* profileMemory)
{
}
virtual void SuppressProbes()
Expand Down Expand Up @@ -349,8 +349,8 @@ class BlockCountInstrumentor : public Instrumentor
}
void Prepare(bool isPreImport) override;
void BuildSchemaElements(BasicBlock* block, Schema& schema) override;
void Instrument(BasicBlock* block, Schema& schema, BYTE* profileMemory) override;
void InstrumentMethodEntry(Schema& schema, BYTE* profileMemory) override;
void Instrument(BasicBlock* block, Schema& schema, uint8_t* profileMemory) override;
void InstrumentMethodEntry(Schema& schema, uint8_t* profileMemory) override;
};

//------------------------------------------------------------------------
Expand Down Expand Up @@ -428,7 +428,7 @@ void BlockCountInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche
// schema -- instrumentation schema
// profileMemory -- profile data slab
//
void BlockCountInstrumentor::Instrument(BasicBlock* block, Schema& schema, BYTE* profileMemory)
void BlockCountInstrumentor::Instrument(BasicBlock* block, Schema& schema, uint8_t* profileMemory)
{
const ICorJitInfo::PgoInstrumentationSchema& entry = schema[block->bbCountSchemaIndex];

Expand Down Expand Up @@ -464,7 +464,7 @@ void BlockCountInstrumentor::Instrument(BasicBlock* block, Schema& schema, BYTE*
// Notes:
// When prejitting, add the method entry callback node
//
void BlockCountInstrumentor::InstrumentMethodEntry(Schema& schema, BYTE* profileMemory)
void BlockCountInstrumentor::InstrumentMethodEntry(Schema& schema, uint8_t* profileMemory)
{
Compiler::Options& opts = m_comp->opts;
Compiler::Info& info = m_comp->info;
Expand Down Expand Up @@ -982,7 +982,7 @@ class EfficientEdgeCountInstrumentor : public Instrumentor, public SpanningTreeV
return ((block->bbFlags & BBF_IMPORTED) == BBF_IMPORTED);
}
void BuildSchemaElements(BasicBlock* block, Schema& schema) override;
void Instrument(BasicBlock* block, Schema& schema, BYTE* profileMemory) override;
void Instrument(BasicBlock* block, Schema& schema, uint8_t* profileMemory) override;

void Badcode() override
{
Expand Down Expand Up @@ -1132,7 +1132,7 @@ void EfficientEdgeCountInstrumentor::BuildSchemaElements(BasicBlock* block, Sche
// schema -- instrumentation schema
// profileMemory -- profile data slab
//
void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schema, BYTE* profileMemory)
void EfficientEdgeCountInstrumentor::Instrument(BasicBlock* block, Schema& schema, uint8_t* profileMemory)
{
// Inlinee compilers build their blocks in the root compiler's
// graph. So for NumSucc, we use the root compiler instance.
Expand Down Expand Up @@ -1307,12 +1307,12 @@ class BuildClassProbeSchemaGen
class ClassProbeInserter
{
Schema& m_schema;
BYTE* m_profileMemory;
uint8_t* m_profileMemory;
int* m_currentSchemaIndex;
unsigned& m_instrCount;

public:
ClassProbeInserter(Schema& schema, BYTE* profileMemory, int* pCurrentSchemaIndex, unsigned& instrCount)
ClassProbeInserter(Schema& schema, uint8_t* profileMemory, int* pCurrentSchemaIndex, unsigned& instrCount)
: m_schema(schema)
, m_profileMemory(profileMemory)
, m_currentSchemaIndex(pCurrentSchemaIndex)
Expand Down Expand Up @@ -1349,7 +1349,7 @@ class ClassProbeInserter

// Figure out where the table is located.
//
BYTE* classProfile = m_schema[*m_currentSchemaIndex].Offset + m_profileMemory;
uint8_t* classProfile = m_schema[*m_currentSchemaIndex].Offset + m_profileMemory;
*m_currentSchemaIndex += 2; // There are 2 schema entries per class probe

// Grab a temp to hold the 'this' object as it will be used three times
Expand Down Expand Up @@ -1426,7 +1426,7 @@ class ClassProbeInstrumentor : public Instrumentor
}
void Prepare(bool isPreImport) override;
void BuildSchemaElements(BasicBlock* block, Schema& schema) override;
void Instrument(BasicBlock* block, Schema& schema, BYTE* profileMemory) override;
void Instrument(BasicBlock* block, Schema& schema, uint8_t* profileMemory) override;
void SuppressProbes() override;
};

Expand Down Expand Up @@ -1490,7 +1490,7 @@ void ClassProbeInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche
// schema -- instrumentation schema
// profileMemory -- profile data slab
//
void ClassProbeInstrumentor::Instrument(BasicBlock* block, Schema& schema, BYTE* profileMemory)
void ClassProbeInstrumentor::Instrument(BasicBlock* block, Schema& schema, uint8_t* profileMemory)
{
if ((block->bbFlags & BBF_HAS_CLASS_PROFILE) == 0)
{
Expand Down Expand Up @@ -1563,21 +1563,43 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod()
// Choose instrumentation technology.
//
// We enable edge profiling by default, except when:
//
// * disabled by option
// * we are prejitting
// * we are jitting osr methods
// * we are jitting tier0 methods with patchpoints
// * we are jitting an OSR method
//
// Currently, OSR is incompatible with edge profiling. So if OSR is enabled,
// always do block profiling.
// OSR is incompatible with edge profiling. Only portions of the Tier0
// method will be executed, and the bail-outs at patchpoints won't be obvious
// exit points from the method. So for OSR we always do block profiling.
//
// Note this incompatibility only exists for methods that actually have
// patchpoints, but we won't know that until we import.
// patchpoints. Currently we will only place patchponts in methods with
// backwards jumps.
//
// And because we want the Tier1 method to see the full set of profile data,
// when OSR is enabled, both Tier0 and any OSR methods need to contribute to
// the same profile data set. Since Tier0 has laid down a dense block-based
// schema, the OSR methods must use this schema as well.
//
// Note that OSR methods may also inline. We currently won't instrument
// any inlinee contributions (which would also need to carefully "share"
// the profile data segment with any Tier0 version and/or any other equivalent
// inlnee), so we'll lose a bit of their profile data. We can support this
// eventually if it turns out to matter.
//
// Similar issues arise with partially jitted methods. Because we currently
// only defer jitting for throw blocks, we currently ignore the impact of partial
// jitting on PGO. If we ever implement a broader pattern of deferral -- say deferring
// based on static PGO -- we will need to reconsider.
//
CLANG_FORMAT_COMMENT_ANCHOR;

const bool prejit = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT);
const bool osr = (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0) && (JitConfig.TC_OnStackReplacement() > 0));
const bool useEdgeProfiles = (JitConfig.JitEdgeProfiling() > 0) && !prejit && !osr;
const bool prejit = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT);
const bool tier0WithPatchpoints = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0) &&
(JitConfig.TC_OnStackReplacement() > 0) && compHasBackwardJump;
const bool osrMethod = opts.IsOSR();
const bool useEdgeProfiles = (JitConfig.JitEdgeProfiling() > 0) && !prejit && !tier0WithPatchpoints && !osrMethod;

if (useEdgeProfiles)
{
Expand All @@ -1586,7 +1608,9 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod()
else
{
JITDUMP("Using block profiling, because %s\n",
(JitConfig.JitEdgeProfiling() > 0) ? "edge profiles disabled" : prejit ? "prejitting" : "OSR");
(JitConfig.JitEdgeProfiling() > 0)
? "edge profiles disabled"
: prejit ? "prejitting" : osrMethod ? "OSR" : "tier0 with patchpoints");

fgCountInstrumentor = new (this, CMK_Pgo) BlockCountInstrumentor(this);
}
Expand Down Expand Up @@ -1636,7 +1660,7 @@ PhaseStatus Compiler::fgInstrumentMethod()
{
noway_assert(!compIsForInlining());

// Make post-importpreparations.
// Make post-import preparations.
//
const bool isPreImport = false;
fgCountInstrumentor->Prepare(isPreImport);
Expand Down Expand Up @@ -1694,11 +1718,16 @@ PhaseStatus Compiler::fgInstrumentMethod()

assert(schema.size() > 0);

// Allocate the profile buffer
// Allocate/retrieve the profile buffer.
//
BYTE* profileMemory;

HRESULT res = info.compCompHnd->allocPgoInstrumentationBySchema(info.compMethodHnd, schema.data(),
// If this is an OSR method, we should use the same buffer that the Tier0 method used.
//
// This is supported by allocPgoInsrumentationDataBySchema, which will verify the schema
// we provide here matches the one from Tier0, and will fill in the data offsets in
// our schema properly.
//
uint8_t* profileMemory;
HRESULT res = info.compCompHnd->allocPgoInstrumentationBySchema(info.compMethodHnd, schema.data(),
(UINT32)schema.size(), &profileMemory);

// Deal with allocation failures.
Expand Down
14 changes: 11 additions & 3 deletions src/coreclr/jit/importer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21244,10 +21244,19 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call,
//
assert(call->IsVirtual());

// Possibly instrument, if not optimizing.
// Possibly instrument. Note for OSR+PGO we will instrument when
// optimizing and (currently) won't devirtualize. We may want
// to revisit -- if we can devirtualize we should be able to
// suppress the probe.
//
if (opts.OptimizationDisabled())
// We strip BBINSTR from inlinees currently, so we'll only
// do this for the root method calls.
//
if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR))
{
assert(opts.OptimizationDisabled() || opts.IsOSR());
assert(!compIsForInlining());

// During importation, optionally flag this block as one that
// contains calls requiring class profiling. Ideally perhaps
// we'd just keep track of the calls themselves, so we don't
Expand Down Expand Up @@ -21277,7 +21286,6 @@ void Compiler::impDevirtualizeCall(GenTreeCall* call,
//
compCurBB->bbFlags |= BBF_HAS_CLASS_PROFILE;
}

return;
}

Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/vm/jitinterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12532,14 +12532,15 @@ CORJIT_FLAGS GetCompileFlags(MethodDesc * ftn, CORJIT_FLAGS flags, CORINFO_METHO
//
// * We're writing pgo data and we're jitting at Tier0.
// * Tiered PGO is enabled and we're jitting at Tier0.
// * Tiered PGO is enabled and we are jitting an OSR method.
//
if ((CLRConfig::GetConfigValue(CLRConfig::INTERNAL_WritePGOData) > 0)
&& flags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_TIER0))
{
flags.Set(CORJIT_FLAGS::CORJIT_FLAG_BBINSTR);
}
else if ((CLRConfig::GetConfigValue(CLRConfig::INTERNAL_TieredPGO) > 0)
&& flags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_TIER0))
&& (flags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_TIER0) || flags.IsSet(CORJIT_FLAGS::CORJIT_FLAG_OSR)))
{
flags.Set(CORJIT_FLAGS::CORJIT_FLAG_BBINSTR);
}
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/vm/pgo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -653,7 +653,7 @@ HRESULT PgoManager::allocPgoInstrumentationBySchemaInstance(MethodDesc* pMD,
HeaderList *currentHeaderList = m_pgoHeaders;
if (currentHeaderList != NULL)
{
if (!ComparePgoSchemaEquals(currentHeaderList->header.GetData(), currentHeaderList->header.countsOffset, pSchema, countSchemaItems))
if (!ComparePgoSchemaCompatible(currentHeaderList->header.GetData(), currentHeaderList->header.countsOffset, pSchema, countSchemaItems))
{
return E_NOTIMPL;
}
Expand Down Expand Up @@ -683,7 +683,7 @@ HRESULT PgoManager::allocPgoInstrumentationBySchemaInstance(MethodDesc* pMD,
HeaderList* existingData = laPgoManagerThis->m_pgoDataLookup.Lookup(pMD);
if (existingData != NULL)
{
if (!ComparePgoSchemaEquals(existingData->header.GetData(), existingData->header.countsOffset, pSchema, countSchemaItems))
if (!ComparePgoSchemaCompatible(existingData->header.GetData(), existingData->header.countsOffset, pSchema, countSchemaItems))
{
return E_NOTIMPL;
}
Expand Down
4 changes: 4 additions & 0 deletions src/tests/Common/testenvironment.proj
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,11 @@
<TestEnvironment Include="gcstress0xc_jitminopts_heapverify1" GCStress="0xC" JITMinOpts="1" HeapVerify="1" />
<TestEnvironment Include="jitosr" TC_OnStackReplacement="1" TC_QuickJitForLoops="1" TieredCompilation="1" />
<TestEnvironment Include="jitosr_stress" TC_OnStackReplacement="1" TC_QuickJitForLoops="1" TC_OnStackReplacement_InitialCounter="1" OSR_HitLimit="1" TieredCompilation="1" />
<TestEnvironment Include="jitosr_pgo" TC_OnStackReplacement="1" TC_QuickJitForLoops="1" TieredCompilation="1" TieredPGO="1" />
<TestEnvironment Include="jitpartialcompilation" TC_PartialCompilation="1" TC_QuickJitForLoops="1" TieredCompilation="1" />
<TestEnvironment Include="jitpartialcompilation_pgo" TC_PartialCompilation="1" TC_QuickJitForLoops="1" TieredCompilation="1" TieredPGO="1" />
<TestEnvironment Include="jitpartialcompilation_osr" TC_OnStackReplacement="1" TC_PartialCompilation="1" TC_QuickJitForLoops="1" TieredCompilation="1" />
<TestEnvironment Include="jitpartialcompilation_osr_pgo" TC_OnStackReplacement="1" TC_PartialCompilation="1" TC_QuickJitForLoops="1" TieredCompilation="1" TieredPGO="1" />
<TestEnvironment Include="jitobjectstackallocation" JitObjectStackAllocation="1" TieredCompilation="0" />
<TestEnvironment Include="ilasmroundtrip" RunningIlasmRoundTrip="1" />
<TestEnvironment Include="clrinterpreter" TieredCompilation="1" />
Expand Down