@@ -300,10 +300,10 @@ class Instrumentor
300300 virtual void BuildSchemaElements (BasicBlock* block, Schema& schema)
301301 {
302302 }
303- virtual void Instrument (BasicBlock* block, Schema& schema, BYTE * profileMemory)
303+ virtual void Instrument (BasicBlock* block, Schema& schema, uint8_t * profileMemory)
304304 {
305305 }
306- virtual void InstrumentMethodEntry (Schema& schema, BYTE * profileMemory)
306+ virtual void InstrumentMethodEntry (Schema& schema, uint8_t * profileMemory)
307307 {
308308 }
309309 virtual void SuppressProbes ()
@@ -349,8 +349,8 @@ class BlockCountInstrumentor : public Instrumentor
349349 }
350350 void Prepare (bool isPreImport) override ;
351351 void BuildSchemaElements (BasicBlock* block, Schema& schema) override ;
352- void Instrument (BasicBlock* block, Schema& schema, BYTE * profileMemory) override ;
353- void InstrumentMethodEntry (Schema& schema, BYTE * profileMemory) override ;
352+ void Instrument (BasicBlock* block, Schema& schema, uint8_t * profileMemory) override ;
353+ void InstrumentMethodEntry (Schema& schema, uint8_t * profileMemory) override ;
354354};
355355
356356// ------------------------------------------------------------------------
@@ -428,7 +428,7 @@ void BlockCountInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche
428428// schema -- instrumentation schema
429429// profileMemory -- profile data slab
430430//
431- void BlockCountInstrumentor::Instrument (BasicBlock* block, Schema& schema, BYTE * profileMemory)
431+ void BlockCountInstrumentor::Instrument (BasicBlock* block, Schema& schema, uint8_t * profileMemory)
432432{
433433 const ICorJitInfo::PgoInstrumentationSchema& entry = schema[block->bbCountSchemaIndex ];
434434
@@ -464,7 +464,7 @@ void BlockCountInstrumentor::Instrument(BasicBlock* block, Schema& schema, BYTE*
464464// Notes:
465465// When prejitting, add the method entry callback node
466466//
467- void BlockCountInstrumentor::InstrumentMethodEntry (Schema& schema, BYTE * profileMemory)
467+ void BlockCountInstrumentor::InstrumentMethodEntry (Schema& schema, uint8_t * profileMemory)
468468{
469469 Compiler::Options& opts = m_comp->opts ;
470470 Compiler::Info& info = m_comp->info ;
@@ -1002,7 +1002,7 @@ class EfficientEdgeCountInstrumentor : public Instrumentor, public SpanningTreeV
10021002 return ((block->bbFlags & BBF_IMPORTED) == BBF_IMPORTED);
10031003 }
10041004 void BuildSchemaElements (BasicBlock* block, Schema& schema) override ;
1005- void Instrument (BasicBlock* block, Schema& schema, BYTE * profileMemory) override ;
1005+ void Instrument (BasicBlock* block, Schema& schema, uint8_t * profileMemory) override ;
10061006
10071007 void Badcode () override
10081008 {
@@ -1136,7 +1136,7 @@ void EfficientEdgeCountInstrumentor::BuildSchemaElements(BasicBlock* block, Sche
11361136// schema -- instrumentation schema
11371137// profileMemory -- profile data slab
11381138//
1139- void EfficientEdgeCountInstrumentor::Instrument (BasicBlock* block, Schema& schema, BYTE * profileMemory)
1139+ void EfficientEdgeCountInstrumentor::Instrument (BasicBlock* block, Schema& schema, uint8_t * profileMemory)
11401140{
11411141 // Inlinee compilers build their blocks in the root compiler's
11421142 // graph. So for NumSucc, we use the root compiler instance.
@@ -1311,12 +1311,12 @@ class BuildClassProbeSchemaGen
13111311class ClassProbeInserter
13121312{
13131313 Schema& m_schema;
1314- BYTE* m_profileMemory;
1314+ uint8_t * m_profileMemory;
13151315 int * m_currentSchemaIndex;
13161316 unsigned & m_instrCount;
13171317
13181318public:
1319- ClassProbeInserter (Schema& schema, BYTE * profileMemory, int * pCurrentSchemaIndex, unsigned & instrCount)
1319+ ClassProbeInserter (Schema& schema, uint8_t * profileMemory, int * pCurrentSchemaIndex, unsigned & instrCount)
13201320 : m_schema(schema)
13211321 , m_profileMemory(profileMemory)
13221322 , m_currentSchemaIndex(pCurrentSchemaIndex)
@@ -1353,7 +1353,7 @@ class ClassProbeInserter
13531353
13541354 // Figure out where the table is located.
13551355 //
1356- BYTE * classProfile = m_schema[*m_currentSchemaIndex].Offset + m_profileMemory;
1356+ uint8_t * classProfile = m_schema[*m_currentSchemaIndex].Offset + m_profileMemory;
13571357 *m_currentSchemaIndex += 2 ; // There are 2 schema entries per class probe
13581358
13591359 // Grab a temp to hold the 'this' object as it will be used three times
@@ -1430,7 +1430,7 @@ class ClassProbeInstrumentor : public Instrumentor
14301430 }
14311431 void Prepare (bool isPreImport) override ;
14321432 void BuildSchemaElements (BasicBlock* block, Schema& schema) override ;
1433- void Instrument (BasicBlock* block, Schema& schema, BYTE * profileMemory) override ;
1433+ void Instrument (BasicBlock* block, Schema& schema, uint8_t * profileMemory) override ;
14341434 void SuppressProbes () override ;
14351435};
14361436
@@ -1494,7 +1494,7 @@ void ClassProbeInstrumentor::BuildSchemaElements(BasicBlock* block, Schema& sche
14941494// schema -- instrumentation schema
14951495// profileMemory -- profile data slab
14961496//
1497- void ClassProbeInstrumentor::Instrument (BasicBlock* block, Schema& schema, BYTE * profileMemory)
1497+ void ClassProbeInstrumentor::Instrument (BasicBlock* block, Schema& schema, uint8_t * profileMemory)
14981498{
14991499 if ((block->bbFlags & BBF_HAS_CLASS_PROFILE) == 0 )
15001500 {
@@ -1567,21 +1567,43 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod()
15671567 // Choose instrumentation technology.
15681568 //
15691569 // We enable edge profiling by default, except when:
1570+ //
15701571 // * disabled by option
15711572 // * we are prejitting
1572- // * we are jitting osr methods
1573+ // * we are jitting tier0 methods with patchpoints
1574+ // * we are jitting an OSR method
15731575 //
1574- // Currently, OSR is incompatible with edge profiling. So if OSR is enabled,
1575- // always do block profiling.
1576+ // OSR is incompatible with edge profiling. Only portions of the Tier0
1577+ // method will be executed, and the bail-outs at patchpoints won't be obvious
1578+ // exit points from the method. So for OSR we always do block profiling.
15761579 //
15771580 // Note this incompatibility only exists for methods that actually have
1578- // patchpoints, but we won't know that until we import.
1581+ // patchpoints. Currently we will only place patchponts in methods with
1582+ // backwards jumps.
1583+ //
1584+ // And because we want the Tier1 method to see the full set of profile data,
1585+ // when OSR is enabled, both Tier0 and any OSR methods need to contribute to
1586+ // the same profile data set. Since Tier0 has laid down a dense block-based
1587+ // schema, the OSR methods must use this schema as well.
1588+ //
1589+ // Note that OSR methods may also inline. We currently won't instrument
1590+ // any inlinee contributions (which would also need to carefully "share"
1591+ // the profile data segment with any Tier0 version and/or any other equivalent
1592+ // inlnee), so we'll lose a bit of their profile data. We can support this
1593+ // eventually if it turns out to matter.
1594+ //
1595+ // Similar issues arise with partially jitted methods. Because we currently
1596+ // only defer jitting for throw blocks, we currently ignore the impact of partial
1597+ // jitting on PGO. If we ever implement a broader pattern of deferral -- say deferring
1598+ // based on static PGO -- we will need to reconsider.
15791599 //
15801600 CLANG_FORMAT_COMMENT_ANCHOR;
15811601
1582- const bool prejit = opts.jitFlags ->IsSet (JitFlags::JIT_FLAG_PREJIT);
1583- const bool osr = (opts.jitFlags ->IsSet (JitFlags::JIT_FLAG_TIER0) && (JitConfig.TC_OnStackReplacement () > 0 ));
1584- const bool useEdgeProfiles = (JitConfig.JitEdgeProfiling () > 0 ) && !prejit && !osr;
1602+ const bool prejit = opts.jitFlags ->IsSet (JitFlags::JIT_FLAG_PREJIT);
1603+ const bool tier0WithPatchpoints = opts.jitFlags ->IsSet (JitFlags::JIT_FLAG_TIER0) &&
1604+ (JitConfig.TC_OnStackReplacement () > 0 ) && compHasBackwardJump;
1605+ const bool osrMethod = opts.IsOSR ();
1606+ const bool useEdgeProfiles = (JitConfig.JitEdgeProfiling () > 0 ) && !prejit && !tier0WithPatchpoints && !osrMethod;
15851607
15861608 if (useEdgeProfiles)
15871609 {
@@ -1590,7 +1612,9 @@ PhaseStatus Compiler::fgPrepareToInstrumentMethod()
15901612 else
15911613 {
15921614 JITDUMP (" Using block profiling, because %s\n " ,
1593- (JitConfig.JitEdgeProfiling () > 0 ) ? " edge profiles disabled" : prejit ? " prejitting" : " OSR" );
1615+ (JitConfig.JitEdgeProfiling () > 0 )
1616+ ? " edge profiles disabled"
1617+ : prejit ? " prejitting" : osrMethod ? " OSR" : " tier0 with patchpoints" );
15941618
15951619 fgCountInstrumentor = new (this , CMK_Pgo) BlockCountInstrumentor (this );
15961620 }
@@ -1640,7 +1664,7 @@ PhaseStatus Compiler::fgInstrumentMethod()
16401664{
16411665 noway_assert (!compIsForInlining ());
16421666
1643- // Make post-importpreparations .
1667+ // Make post-import preparations .
16441668 //
16451669 const bool isPreImport = false ;
16461670 fgCountInstrumentor->Prepare (isPreImport);
@@ -1665,7 +1689,17 @@ PhaseStatus Compiler::fgInstrumentMethod()
16651689 // Verify we created schema for the calls needing class probes.
16661690 // (we counted those when importing)
16671691 //
1668- assert (fgClassInstrumentor->SchemaCount () == info.compClassProbeCount );
1692+ // This is not true when we do partial compilation; it can/will erase class probes,
1693+ // and there's no easy way to figure out how many should be left.
1694+ //
1695+ if (doesMethodHavePartialCompilationPatchpoints ())
1696+ {
1697+ assert (fgClassInstrumentor->SchemaCount () <= info.compClassProbeCount );
1698+ }
1699+ else
1700+ {
1701+ assert (fgClassInstrumentor->SchemaCount () == info.compClassProbeCount );
1702+ }
16691703
16701704 // Optionally, when jitting, if there were no class probes and only one count probe,
16711705 // suppress instrumentation.
@@ -1698,11 +1732,16 @@ PhaseStatus Compiler::fgInstrumentMethod()
16981732
16991733 assert (schema.size () > 0 );
17001734
1701- // Allocate the profile buffer
1735+ // Allocate/retrieve the profile buffer.
17021736 //
1703- BYTE* profileMemory;
1704-
1705- HRESULT res = info.compCompHnd ->allocPgoInstrumentationBySchema (info.compMethodHnd , schema.data (),
1737+ // If this is an OSR method, we should use the same buffer that the Tier0 method used.
1738+ //
1739+ // This is supported by allocPgoInsrumentationDataBySchema, which will verify the schema
1740+ // we provide here matches the one from Tier0, and will fill in the data offsets in
1741+ // our schema properly.
1742+ //
1743+ uint8_t * profileMemory;
1744+ HRESULT res = info.compCompHnd ->allocPgoInstrumentationBySchema (info.compMethodHnd , schema.data (),
17061745 (UINT32)schema.size (), &profileMemory);
17071746
17081747 // Deal with allocation failures.
@@ -1924,6 +1963,14 @@ void Compiler::fgIncorporateBlockCounts()
19241963 fgSetProfileWeight (block, profileWeight);
19251964 }
19261965 }
1966+
1967+ // For OSR, give the method entry (which will be a scratch BB)
1968+ // the same weight as the OSR Entry.
1969+ //
1970+ if (opts.IsOSR ())
1971+ {
1972+ fgFirstBB->inheritWeight (fgOSREntryBB);
1973+ }
19271974}
19281975
19291976// ------------------------------------------------------------------------
@@ -3277,11 +3324,17 @@ void Compiler::fgComputeCalledCount(weight_t returnWeight)
32773324
32783325 BasicBlock* firstILBlock = fgFirstBB; // The first block for IL code (i.e. for the IL code at offset 0)
32793326
3280- // Skip past any/all BBF_INTERNAL blocks that may have been added before the first real IL block.
3327+ // OSR methods can have complex entry flow, and so
3328+ // for OSR we ensure fgFirstBB has plausible profile data.
32813329 //
3282- while (firstILBlock-> bbFlags & BBF_INTERNAL )
3330+ if (!opts. IsOSR () )
32833331 {
3284- firstILBlock = firstILBlock->bbNext ;
3332+ // Skip past any/all BBF_INTERNAL blocks that may have been added before the first real IL block.
3333+ //
3334+ while (firstILBlock->bbFlags & BBF_INTERNAL)
3335+ {
3336+ firstILBlock = firstILBlock->bbNext ;
3337+ }
32853338 }
32863339
32873340 // The 'firstILBlock' is now expected to have a profile-derived weight
0 commit comments