From b403951d9508110cc77a52d2159bd5593cab9855 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 17 Nov 2021 15:20:22 +0100 Subject: [PATCH 1/6] Add support for dumping and using precise debug info * In the JIT, add support for dumping the precise debug info out through an environment variable `DOTNET_JitDumpPreciseDebugInfoFile` in a simple JSON format. This is a stopgap until we expose the extra information through ETW events. * In dotnet-pgo, add an argument --precise-debug-info-file which can point to the file produced by the JIT. When used, dotnet-pgo will get native<->IL mappings from this file instead of through ETW events. * In dotnet-pgo, add support for attributing samples to inlinees when that information is present. This changes the attribution process a bit: previously, we would group all LBR data/samples and then construct the profile from all the data. We now do it in a more streaming way where there is a SampleCorrelator that can handle individual LBR records and individual samples. * In dotnet-pgo, add an argument --dump-worst-overlap-graphs-to which can be used in the compare-mibc command to dump out a .dot file containing the flow graph of the methods with the worst overlap measures, and showing the relative weight count on each basic block and edge for the two profiles being compared. This is particular useful to find out where we are producing incorrect debug mappings, by comparing spgo.mibc and instrumented.mibc files. --- src/coreclr/jit/codegen.h | 7 +- src/coreclr/jit/codegencommon.cpp | 104 +++++ src/coreclr/jit/codegenlinear.cpp | 6 + src/coreclr/jit/compiler.h | 10 + src/coreclr/jit/inline.h | 13 +- src/coreclr/jit/jitconfigvalues.h | 2 + .../tools/Common/TypeSystem/IL/FlowGraph.cs | 41 -- .../tools/dotnet-pgo/CommandLineOptions.cs | 22 +- .../tools/dotnet-pgo/MethodMemoryMap.cs | 85 +++- .../dotnet-pgo/PgoCompareMethodFlowGraph.cs | 292 ++++++++++++++ src/coreclr/tools/dotnet-pgo/Program.cs | 374 +++++------------- .../tools/dotnet-pgo/SPGO/FlowGraphHelper.cs | 85 ++++ .../tools/dotnet-pgo/SPGO/KeyValueMap.cs | 76 ++++ .../tools/dotnet-pgo/SPGO/NativeToILMap.cs | 80 ---- .../tools/dotnet-pgo/SPGO/PreciseDebugInfo.cs | 27 ++ .../tools/dotnet-pgo/SPGO/SampleCorrelator.cs | 278 +++++++++++++ .../tools/dotnet-pgo/SPGO/SampleProfile.cs | 73 ++-- 17 files changed, 1126 insertions(+), 449 deletions(-) create mode 100644 src/coreclr/tools/dotnet-pgo/PgoCompareMethodFlowGraph.cs create mode 100644 src/coreclr/tools/dotnet-pgo/SPGO/FlowGraphHelper.cs create mode 100644 src/coreclr/tools/dotnet-pgo/SPGO/KeyValueMap.cs delete mode 100644 src/coreclr/tools/dotnet-pgo/SPGO/NativeToILMap.cs create mode 100644 src/coreclr/tools/dotnet-pgo/SPGO/PreciseDebugInfo.cs create mode 100644 src/coreclr/tools/dotnet-pgo/SPGO/SampleCorrelator.cs diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 74420b2883260b..44bbd93a3c46f7 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -556,11 +556,16 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX void genIPmappingListDisp(); #endif // DEBUG - IPmappingDsc* genCreateIPMapping(IPmappingDscKind kind, const DebugInfo& di, bool isLabel); void genIPmappingAdd(IPmappingDscKind kind, const DebugInfo& di, bool isLabel); void genIPmappingAddToFront(IPmappingDscKind kind, const DebugInfo& di, bool isLabel); void genIPmappingGen(); +#ifdef DEBUG + void genDumpPreciseDebugInfo(); + void genDumpPreciseDebugInfoInlineTree(FILE* file, InlineContext* context, bool* first); + void genAddPreciseIPMappingHere(const DebugInfo& di); +#endif + void genEnsureCodeEmitted(const DebugInfo& di); //------------------------------------------------------------------------- diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index a33b4b94a5994c..fecac126540c11 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -127,6 +127,9 @@ CodeGen::CodeGen(Compiler* theCompiler) : CodeGenInterface(theCompiler) compiler->genIPmappingLast = nullptr; compiler->genCallSite2DebugInfoMap = nullptr; + compiler->genPreciseIPMappingsHead = nullptr; + compiler->genPreciseIPMappingsTail = nullptr; + /* Assume that we not fully interruptible */ SetInterruptible(false); @@ -2446,6 +2449,8 @@ void CodeGen::genEmitUnwindDebugGCandEH() genIPmappingGen(); + INDEBUG(genDumpPreciseDebugInfo()); + /* Finalize the Local Var info in terms of generated code */ genSetScopeInfo(); @@ -10801,6 +10806,105 @@ void CodeGen::genIPmappingGen() compiler->eeSetLIdone(); } +#ifdef DEBUG +void CodeGen::genDumpPreciseDebugInfoInlineTree(FILE* file, InlineContext* context, bool* first) +{ + if (context->GetSibling() != nullptr) + { + genDumpPreciseDebugInfoInlineTree(file, context->GetSibling(), first); + } + + if (context->IsSuccess()) + { + if (!*first) + { + fprintf(file, ","); + } + + *first = false; + + fprintf(file, "{\"Ordinal\":%u,", context->GetOrdinal()); + fprintf(file, "\"MethodID\":%lld,", (INT64)context->GetCallee()); + const char* className; + const char* methodName = compiler->eeGetMethodName(context->GetCallee(), &className); + fprintf(file, "\"MethodName\":\"%s\",", methodName); + fprintf(file, "\"Inlinees\":["); + if (context->GetChild() != nullptr) + { + bool childFirst = true; + genDumpPreciseDebugInfoInlineTree(file, context->GetChild(), &childFirst); + } + fprintf(file, "]}"); + } +} + +void CodeGen::genDumpPreciseDebugInfo() +{ + if (JitConfig.JitDumpPreciseDebugInfoFile() == nullptr) + return; + + static unsigned int s_flag; + + while (InterlockedCompareExchange(&s_flag, 1, 0) != 0) + System_YieldProcessor(); + + FILE* file = _wfopen(JitConfig.JitDumpPreciseDebugInfoFile(), W("a")); + if (file == nullptr) + return; + + // MethodID in ETW events are the method handles. + fprintf(file, "{\"MethodID\":%lld,", (INT64)compiler->info.compMethodHnd); + // Print inline tree. + fprintf(file, "\"InlineTree\":"); + + bool first = true; + genDumpPreciseDebugInfoInlineTree(file, compiler->compInlineContext, &first); + fprintf(file, ",\"Mappings\":["); + first = true; + for (PreciseIPMapping* mapping = compiler->genPreciseIPMappingsHead; mapping != nullptr; mapping = mapping->next) + { + if (!first) + { + fprintf(file, ","); + } + + first = false; + + fprintf( + file, + "{\"NativeOffset\":%u,\"InlineContext\":%u,\"ILOffset\":%u}", + mapping->nativeLoc.CodeOffset(GetEmitter()), + mapping->debugInfo.GetInlineContext()->GetOrdinal(), + mapping->debugInfo.GetLocation().GetOffset()); + } + + fprintf(file, "]}\n"); + + fclose(file); + + InterlockedCompareExchange(&s_flag, 0, 1); +} + +void CodeGen::genAddPreciseIPMappingHere(const DebugInfo& di) +{ + PreciseIPMapping* mapping = new (compiler, CMK_DebugInfo) PreciseIPMapping; + mapping->next = nullptr; + mapping->nativeLoc.CaptureLocation(GetEmitter()); + mapping->debugInfo = di; + + if (compiler->genPreciseIPMappingsTail != nullptr) + { + compiler->genPreciseIPMappingsTail->next = mapping; + } + else + { + compiler->genPreciseIPMappingsHead = mapping; + } + + compiler->genPreciseIPMappingsTail = mapping; +} +#endif + /*============================================================================ * * These are empty stubs to help the late dis-assembler to compile diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index cec348f86ee44a..0f8ddcab46515f 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -440,7 +440,13 @@ void CodeGen::genCodeForBBlist() genIPmappingAdd(IPmappingDscKind::Normal, currentDI, firstMapping); firstMapping = false; } + #ifdef DEBUG + if ((JitConfig.JitDumpPreciseDebugInfoFile() != nullptr) && ilOffset->gtStmtDI.IsValid()) + { + genAddPreciseIPMappingHere(ilOffset->gtStmtDI); + } + assert(ilOffset->gtStmtLastILoffs <= compiler->info.compILCodeSize || ilOffset->gtStmtLastILoffs == BAD_IL_OFFSET); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 4f1895918545c8..3055b4beb8de62 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2535,6 +2535,13 @@ struct IPmappingDsc bool ipmdIsLabel; // Can this code be a branch label? }; +struct PreciseIPMapping +{ + PreciseIPMapping* next; + emitLocation nativeLoc; + DebugInfo debugInfo; +}; + /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -8305,6 +8312,9 @@ class Compiler IPmappingDsc* genIPmappingList; IPmappingDsc* genIPmappingLast; + PreciseIPMapping* genPreciseIPMappingsHead; + PreciseIPMapping* genPreciseIPMappingsTail; + // Managed RetVal - A side hash table meant to record the mapping from a // GT_CALL node to its debug info. This info is used to emit sequence points // that can be used by debugger to determine the native offset at which the diff --git a/src/coreclr/jit/inline.h b/src/coreclr/jit/inline.h index ee3fb1f3035f40..983f9cf1686f4e 100644 --- a/src/coreclr/jit/inline.h +++ b/src/coreclr/jit/inline.h @@ -726,6 +726,18 @@ class InlineContext return m_Parent; } + // Get the sibling context. + InlineContext* GetSibling() const + { + return m_Sibling; + } + + // Get the first child context. + InlineContext* GetChild() const + { + return m_Child; + } + // Get the code pointer for this context. const BYTE* GetCode() const { @@ -806,7 +818,6 @@ class InlineContext private: InlineContext(InlineStrategy* strategy); -private: InlineStrategy* m_InlineStrategy; // overall strategy InlineContext* m_Parent; // logical caller (parent) InlineContext* m_Child; // first child diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 13873e28f89e9b..eb289a8f8d1445 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -226,6 +226,8 @@ CONFIG_INTEGER(JitDumpFgConstrained, W("JitDumpFgConstrained"), 1) // 0 == don't CONFIG_INTEGER(JitDumpFgBlockID, W("JitDumpFgBlockID"), 0) // 0 == display block with bbNum; 1 == display with both // bbNum and bbID +CONFIG_STRING(JitDumpPreciseDebugInfoFile, W("JitDumpPreciseDebugInfoFile")) + CONFIG_STRING(JitLateDisasmTo, W("JITLateDisasmTo")) CONFIG_STRING(JitRange, W("JitRange")) CONFIG_STRING(JitStressModeNames, W("JitStressModeNames")) // Internal Jit stress mode: stress using the given set of diff --git a/src/coreclr/tools/Common/TypeSystem/IL/FlowGraph.cs b/src/coreclr/tools/Common/TypeSystem/IL/FlowGraph.cs index 7fd2bb8cba2558..a4791620331f51 100644 --- a/src/coreclr/tools/Common/TypeSystem/IL/FlowGraph.cs +++ b/src/coreclr/tools/Common/TypeSystem/IL/FlowGraph.cs @@ -88,47 +88,6 @@ public IEnumerable LookupRange(int ilOffsetStart, int ilOffsetEnd) yield return BasicBlocks[i]; } - internal string Dump(Func getNodeAnnot, Func<(BasicBlock, BasicBlock), string> getEdgeAnnot) - { - var sb = new StringBuilder(); - sb.AppendLine("digraph G {"); - sb.AppendLine(" forcelabels=true;"); - sb.AppendLine(); - Dictionary bbToIndex = new Dictionary(); - for (int i = 0; i < BasicBlocks.Count; i++) - bbToIndex.Add(BasicBlocks[i].Start, i); - - foreach (BasicBlock bb in BasicBlocks) - { - string label = $"[{bb.Start:x}..{bb.Start + bb.Size:x})\\n{getNodeAnnot(bb)}"; - sb.AppendLine($" BB{bbToIndex[bb.Start]} [label=\"{label}\"];"); - } - - sb.AppendLine(); - - foreach (BasicBlock bb in BasicBlocks) - { - foreach (BasicBlock tar in bb.Targets) - { - string label = getEdgeAnnot((bb, tar)); - string postfix = string.IsNullOrEmpty(label) ? "" : $" [label=\"{label}\"]"; - sb.AppendLine($" BB{bbToIndex[bb.Start]} -> BB{bbToIndex[tar.Start]}{postfix};"); - } - } - - // Write ranks with BFS. - List curRank = new List { BasicBlocks.Single(bb => bb.Start == 0) }; - HashSet seen = new HashSet(curRank); - while (curRank.Count > 0) - { - sb.AppendLine($" {{rank = same; {string.Concat(curRank.Select(bb => $"BB{bbToIndex[bb.Start]}; "))}}}"); - curRank = curRank.SelectMany(bb => bb.Targets).Where(seen.Add).ToList(); - } - - sb.AppendLine("}"); - return sb.ToString(); - } - public static FlowGraph Create(MethodIL il) { HashSet bbStarts = GetBasicBlockStarts(il); diff --git a/src/coreclr/tools/dotnet-pgo/CommandLineOptions.cs b/src/coreclr/tools/dotnet-pgo/CommandLineOptions.cs index a2e3a82f37fdb6..f460a5f49817f3 100644 --- a/src/coreclr/tools/dotnet-pgo/CommandLineOptions.cs +++ b/src/coreclr/tools/dotnet-pgo/CommandLineOptions.cs @@ -18,6 +18,7 @@ internal class CommandLineOptions public FileInfo TraceFile; public FileInfo OutputFileName; + public FileInfo PreciseDebugInfoFile; public int? Pid; public string ProcessName; public PgoFileType? FileType; @@ -29,8 +30,7 @@ internal class CommandLineOptions public bool ValidateOutputFile; public bool GenerateCallGraph; public bool Spgo; - public bool SpgoIncludeBlockCounts; - public bool SpgoIncludeEdgeCounts; + public bool IncludeFullGraphs; public int SpgoMinSamples = 50; public bool VerboseWarnings; public jittraceoptions JitTraceOptions; @@ -45,6 +45,8 @@ internal class CommandLineOptions public bool DumpMibc = false; public FileInfo InputFileToDump; public List CompareMibc; + public DirectoryInfo DumpWorstOverlapGraphsTo; + public int DumpWorstOverlapGraphs = -1; public bool InheritTimestamp; public string[] HelpArgs = Array.Empty(); @@ -196,13 +198,15 @@ void HelpOption() CommonOptions(); CompressedOption(); + string preciseDebugInfoFile = null; + syntax.DefineOption(name: "precise-debug-info-file", ref preciseDebugInfoFile, "Name of file of newline separated JSON objects containing precise debug info"); + if (preciseDebugInfoFile != null) + PreciseDebugInfoFile = new FileInfo(preciseDebugInfoFile); + syntax.DefineOption(name: "spgo", value: ref Spgo, help: "Base profile on samples in the input. Uses last branch records if available and otherwise raw IP samples.", requireValue: false); - syntax.DefineOption(name: "spgo-with-block-counts", value: ref SpgoIncludeBlockCounts, help: "Include block counts in the written .mibc file. If neither this nor spgo-with-edge-counts are specified, then defaults to true.", requireValue: false); - syntax.DefineOption(name: "spgo-with-edge-counts", value: ref SpgoIncludeEdgeCounts, help: "Include edge counts in the written .mibc file.", requireValue: false); syntax.DefineOption(name: "spgo-min-samples", value: ref SpgoMinSamples, help: $"The minimum number of total samples a function must have before generating profile data for it with SPGO. Default: {SpgoMinSamples}", requireValue: false); - if (!SpgoIncludeBlockCounts && !SpgoIncludeEdgeCounts) - SpgoIncludeBlockCounts = true; + syntax.DefineOption(name: "include-full-graphs", value: ref IncludeFullGraphs, help: "Include all blocks and edges in the written .mibc file, regardless of profile counts", requireValue: false); HelpOption(); } @@ -305,6 +309,12 @@ void HelpOption() CompareMibc = DefineFileOptionList(name: "i|input", help: "The input .mibc files to be compared. Specify as --input file1.mibc --input file2.mibc"); if (CompareMibc.Count != 2) Help = true; + + syntax.DefineOption(name: "dump-worst-overlap-graphs", value: ref DumpWorstOverlapGraphs, help: "Number of graphs to dump to .dot format in dump-worst-overlap-graphs-to directory"); + string dumpWorstOverlapGraphsTo = null; + syntax.DefineOption(name: "dump-worst-overlap-graphs-to", value: ref dumpWorstOverlapGraphsTo, help: "Number of graphs to dump to .dot format in dump-worst-overlap-graphs-to directory"); + if (dumpWorstOverlapGraphsTo != null) + DumpWorstOverlapGraphsTo = new DirectoryInfo(dumpWorstOverlapGraphsTo); } if (syntax.ActiveCommand == null) diff --git a/src/coreclr/tools/dotnet-pgo/MethodMemoryMap.cs b/src/coreclr/tools/dotnet-pgo/MethodMemoryMap.cs index 0a5074d0cf2435..e502bff6f027fe 100644 --- a/src/coreclr/tools/dotnet-pgo/MethodMemoryMap.cs +++ b/src/coreclr/tools/dotnet-pgo/MethodMemoryMap.cs @@ -8,7 +8,10 @@ using System.IO; using System.Linq; using System.Text; +using System.Text.Json; +using System.Text.Json.Serialization; using System.Threading.Tasks; +using ILCompiler.Reflection.ReadyToRun; using Internal.TypeSystem; using Microsoft.Diagnostics.Tracing.Etlx; using Microsoft.Diagnostics.Tracing.Parsers.Clr; @@ -48,6 +51,7 @@ public MethodMemoryMap( TraceTypeSystemContext tsc, TraceRuntimeDescToTypeSystemDesc idParser, int clrInstanceID, + string preciseDebugInfoFile, Logger logger) { // Capture the addresses of jitted code @@ -145,7 +149,7 @@ public MethodMemoryMap( StartAddress = module.ImageBase + (ulong)runtimeFunction.StartAddress, EndAddress = module.ImageBase + (ulong)runtimeFunction.StartAddress + (uint)runtimeFunction.Size, Method = methodEntry.Key, - NativeToILMap = runtimeFunction.DebugInfo != null ? NativeToILMap.FromR2RBounds(runtimeFunction.DebugInfo.BoundsList) : null, + NativeToILMap = runtimeFunction.DebugInfo != null ? CreateNativeToILMap(methodEntry.Key, runtimeFunction.DebugInfo.BoundsList) : null, }); } } @@ -156,11 +160,31 @@ public MethodMemoryMap( } } - // Associate NativeToILMap with MethodLoad event found Memory Regions - foreach (MethodILToNativeMapTraceData e in p.EventsInProcess.ByEventType()) + List preciseInfos = null; + if (File.Exists(preciseDebugInfoFile)) { - if (info.TryGetValue(new JittedID(e.MethodID, e.ReJITID), out MemoryRegionInfo inf)) - inf.NativeToILMap = NativeToILMap.FromEvent(e); + preciseInfos = + File.ReadAllLines(preciseDebugInfoFile) + .Select(l => JsonSerializer.Deserialize(l)) + .ToList(); + } + + if (preciseInfos != null && preciseInfos.Count > 0) + { + foreach (PreciseDebugInfo preciseDebugInf in preciseInfos) + { + if (info.TryGetValue(new JittedID((long)preciseDebugInf.MethodID, 0), out MemoryRegionInfo inf)) + inf.NativeToILMap = CreateNativeToILMap(idParser, preciseDebugInf); + } + } + else + { + // Associate NativeToILMap with MethodLoad event found Memory Regions + foreach (MethodILToNativeMapTraceData e in p.EventsInProcess.ByEventType()) + { + if (info.TryGetValue(new JittedID(e.MethodID, e.ReJITID), out MemoryRegionInfo inf)) + inf.NativeToILMap = CreateNativeToILMap(inf.Method, e); + } } // Sort the R2R data by StartAddress @@ -221,13 +245,60 @@ private class MemoryRegionInfoStartAddressComparer : IComparer { int IComparer.Compare(MemoryRegionInfo x, MemoryRegionInfo y) => x.StartAddress.CompareTo(y.StartAddress); } + + private static KeyValueMap CreateNativeToILMap(MethodDesc method, List boundsList) + { + List sorted = boundsList.OrderBy(e => e.NativeOffset).ToList(); + + return new(sorted.Select(e => e.NativeOffset).ToArray(), sorted.Select(e => new IPMapping((int)e.ILOffset, null, method)).ToArray()); + } + + private static KeyValueMap CreateNativeToILMap(MethodDesc method, MethodILToNativeMapTraceData ev) + { + List<(uint rva, int ilOffset)> pairs = new List<(uint rva, int ilOffset)>(ev.CountOfMapEntries); + for (int i = 0; i < ev.CountOfMapEntries; i++) + pairs.Add(((uint)ev.NativeOffset(i), ev.ILOffset(i))); + + pairs.RemoveAll(p => p.ilOffset < 0); + pairs.Sort((p1, p2) => p1.rva.CompareTo(p2.rva)); + return new(pairs.Select(p => p.rva).ToArray(), pairs.Select(p => new IPMapping(p.ilOffset, null, method)).ToArray()); + } + + private static KeyValueMap CreateNativeToILMap(TraceRuntimeDescToTypeSystemDesc idParser, PreciseDebugInfo inf) + { + Dictionary byOrdinal = new(); + AddSubTree(inf.InlineTree); + + void AddSubTree(InlineContext ctx) + { + MethodDesc md = idParser.ResolveMethodID((long)ctx.MethodID, false); + byOrdinal.Add(ctx.Ordinal, (ctx, md)); + + foreach (var child in ctx.Inlinees) + AddSubTree(child); + } + + var ordered = inf.Mappings.OrderBy(m => m.NativeOffset).ToList(); + IPMapping CreateMapping(PreciseIPMapping preciseMapping) + { + (InlineContext ctx, MethodDesc md) = byOrdinal[preciseMapping.InlineContext]; + return new IPMapping(checked((int)preciseMapping.ILOffset), ctx, md); + } + + return new(ordered.Select(p => p.NativeOffset).ToArray(), ordered.Select(CreateMapping).ToArray()); + } } - public class MemoryRegionInfo + internal class MemoryRegionInfo { public ulong StartAddress { get; set; } public ulong EndAddress { get; set; } public MethodDesc Method { get; set; } - public NativeToILMap NativeToILMap { get; set; } + public KeyValueMap NativeToILMap { get; set; } } + + internal record struct IPMapping( + int ILOffset, + InlineContext InlineContext, + MethodDesc InlineeMethod); } diff --git a/src/coreclr/tools/dotnet-pgo/PgoCompareMethodFlowGraph.cs b/src/coreclr/tools/dotnet-pgo/PgoCompareMethodFlowGraph.cs new file mode 100644 index 00000000000000..af55de3e9f747d --- /dev/null +++ b/src/coreclr/tools/dotnet-pgo/PgoCompareMethodFlowGraph.cs @@ -0,0 +1,292 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using ILCompiler; +using Internal.Pgo; + +namespace Microsoft.Diagnostics.Tools.Pgo +{ + internal class PgoCompareMethodFlowGraph + { + public bool ProfilesHadBasicBlocks { get; init; } + public bool ProfilesHadEdges { get; init; } + public PgoCompareMethodBasicBlock EntryBasicBlock { get; init; } + public List BasicBlocks { get; init; } + + public long TotalBlockCount1 => BasicBlocks.Sum(bb => bb.BlockCount1); + public long TotalBlockCount2 => BasicBlocks.Sum(bb => bb.BlockCount2); + public long TotalEdgeCount1 => BasicBlocks.Sum(bb => bb.Edges.Sum(e => e.Value.Count1)); + public long TotalEdgeCount2 => BasicBlocks.Sum(bb => bb.Edges.Sum(e => e.Value.Count2)); + + public double ComputeBlockOverlap() + { + long total1 = TotalBlockCount1; + long total2 = TotalBlockCount2; + + if (total1 == 0 && total2 == 0) + return 1; + + if (total1 == 0 || total2 == 0) + return 0; + + double overlap = + BasicBlocks + .Sum(bb => Math.Min(bb.BlockCount1 / (double)total1, bb.BlockCount2 / (double)total2)); + return overlap; + } + + public double ComputeEdgeOverlap() + { + long total1 = TotalEdgeCount1; + long total2 = TotalEdgeCount2; + + if (total1 == 0 && total2 == 0) + return 1; + + if (total1 == 0 || total2 == 0) + return 0; + + double overlap = + BasicBlocks + .Sum(bb => bb.Edges.Values.Sum(e => Math.Min(e.Count1 / (double)total1, e.Count2 / (double)total2))); + return overlap; + } + + public string Dump(string title) + { + long totalBlockCount1 = TotalBlockCount1; + long totalBlockCount2 = TotalBlockCount2; + + string createWeightLabel(long weight1, long totalWeight1, long weight2, long totalWeight2) + { + string label = ""; + if (totalWeight1 == 0) + { + label += "N/A"; + } + else + { + double pw = weight1 / (double)totalWeight1; + label += $"{pw * 100:F2}%"; + } + + label += " vs "; + if (totalWeight2 == 0) + { + label += "N/A"; + } + else + { + double pw = weight2 / (double)totalWeight2; + label += $"{pw * 100:F2}%"; + } + + return label; + } + + string getLabel(PgoCompareMethodBasicBlock bb) + { + string label = $"@ {bb.ILOffset:000}"; + if (ProfilesHadBasicBlocks && (totalBlockCount1 != 0 || totalBlockCount2 != 0)) + { + label += $"\\n{createWeightLabel(bb.BlockCount1, totalBlockCount1, bb.BlockCount2, totalBlockCount2)}"; + } + + return label; + } + + long totalEdgeCount1 = TotalEdgeCount1; + long totalEdgeCount2 = TotalEdgeCount2; + + string getEdgeLabel((PgoCompareMethodBasicBlock from, PgoCompareMethodBasicBlock to) edge) + { + if (!ProfilesHadEdges) + return ""; + + (long weight1, long weight2) = edge.from.Edges[edge.to]; + return createWeightLabel(weight1, totalEdgeCount1, weight2, totalEdgeCount2); + } + + string dot = + FlowGraphHelper.DumpGraph( + BasicBlocks, EntryBasicBlock, + bb => new HashSet(bb.Edges.Keys), + title, + getLabel, + getEdgeLabel); + return dot; + } + + public static PgoCompareMethodFlowGraph Create( + MethodProfileData profile1, + string name1, + MethodProfileData profile2, + string name2, + out List errors) + { + errors = new List(); + if (profile1?.SchemaData == null) + { + errors.Add($"Profile data missing from {name1}"); + return null; + } + if (profile2?.SchemaData == null) + { + errors.Add($"Profile data missing from {name2}"); + return null; + } + + var (blocks1, blocks2) = (GroupBlocks(profile1), GroupBlocks(profile2)); + var (edges1, edges2) = (GroupEdges(profile1), GroupEdges(profile2)); + bool hasBlocks1 = blocks1.Count != 0; + bool hasBlocks2 = blocks2.Count != 0; + bool hasEdges1 = edges1.Count != 0; + bool hasEdges2 = edges2.Count != 0; + if (!hasBlocks1 && !hasBlocks2 && !hasEdges1 && !hasEdges2) + { + errors.Add($"No profile data present in either {name1} or {name2}"); + return null; + } + + bool hasComparableProfileData = + (hasBlocks1 && hasBlocks2) || + (hasEdges1 && hasEdges2); + + if (!hasComparableProfileData) + { + errors.Add($"No comparable profile data present"); + return null; + } + + if (hasBlocks1 && hasBlocks2) + { + var in1 = blocks1.Keys.Where(k => !blocks2.ContainsKey(k)).ToList(); + var in2 = blocks2.Keys.Where(k => !blocks1.ContainsKey(k)).ToList(); + + foreach (var m1 in in1) + errors.Add($"{name1} has a block at {m1:x} not present in {name2}"); + foreach (var m2 in in2) + errors.Add($"{name2} has a block at {m2:x} not present in {name1}"); + } + + if (hasEdges1 && hasEdges2) + { + var in1 = edges1.Keys.Where(k => !edges2.ContainsKey(k)).ToList(); + var in2 = edges2.Keys.Where(k => !edges1.ContainsKey(k)).ToList(); + + foreach (var (from, to) in in1) + errors.Add($"{name1} has an edge {from:x}->{to:x} not present in {name2}"); + foreach (var (from, to) in in2) + errors.Add($"{name2} has an edge {from:x}->{to:x} not present in {name1}"); + } + + if (errors.Count > 0) + { + // Do not continue if flow graphs do not match + return null; + } + + // Note: We permit missing data in one of the two profiles (e.g. + // instrumentation will typically not contain edges if we asked for + // BBs, but we can still compare with SPGO with arg + // --include-full-graphs this way). + + Dictionary ilToBB = new(); + foreach ((int ilOffs, _) in hasBlocks1 ? blocks1 : blocks2) + { + ilToBB.Add( + ilOffs, + new PgoCompareMethodBasicBlock + { + ILOffset = ilOffs, + BlockCount1 = blocks1.TryGetValue(ilOffs, out PgoSchemaElem elem) ? elem.DataLong : 0, + BlockCount2 = blocks2.TryGetValue(ilOffs, out elem) ? elem.DataLong : 0, + }); + } + + foreach (((int ilFrom, int ilTo), _) in hasEdges1 ? edges1 : edges2) + { + if (!ilToBB.TryGetValue(ilFrom, out PgoCompareMethodBasicBlock bbFrom)) + { + if (hasBlocks1 || hasBlocks2) + { + errors.Add($"There is an edge from {ilFrom} -> {ilTo}, but no basic block found at {ilFrom}"); + } + else + { + // If we have no BBs at all use the edges to construct BBs. + ilToBB.Add(ilFrom, bbFrom = new PgoCompareMethodBasicBlock + { + ILOffset = ilFrom + }); + } + } + + if (!ilToBB.TryGetValue(ilTo, out PgoCompareMethodBasicBlock bbTo)) + { + if (hasBlocks1 || hasBlocks2) + { + errors.Add($"There is an edge from {ilFrom} -> {ilTo}, but no basic block found at {ilTo}"); + } + else + { + // If we have no BBs at all use the edges to construct BBs. + ilToBB.Add(ilTo, bbTo = new PgoCompareMethodBasicBlock + { + ILOffset = ilTo + }); + } + } + + long edgeCount1 = edges1.TryGetValue((ilFrom, ilTo), out PgoSchemaElem elem) ? elem.DataLong : 0; + long edgeCount2 = edges2.TryGetValue((ilFrom, ilTo), out elem) ? elem.DataLong : 0; + bbFrom.Edges.Add(bbTo, (edgeCount1, edgeCount2)); + } + + if (!ilToBB.TryGetValue(0, out PgoCompareMethodBasicBlock entryBasicBlock)) + { + errors.Add("No entry block found"); + return null; + } + + return new PgoCompareMethodFlowGraph + { + BasicBlocks = ilToBB.Values.ToList(), + EntryBasicBlock = entryBasicBlock, + ProfilesHadBasicBlocks = hasBlocks1 && hasBlocks2, + ProfilesHadEdges = hasEdges1 && hasEdges2, + }; + } + + private static Dictionary GroupBlocks(MethodProfileData data) + { + return data.SchemaData + .Where(e => e.InstrumentationKind == PgoInstrumentationKind.BasicBlockIntCount || e.InstrumentationKind == PgoInstrumentationKind.BasicBlockLongCount) + .ToDictionary(e => e.ILOffset); + } + + private static Dictionary<(int, int), PgoSchemaElem> GroupEdges(MethodProfileData data) + { + return data.SchemaData + .Where(e => e.InstrumentationKind == PgoInstrumentationKind.EdgeIntCount || e.InstrumentationKind == PgoInstrumentationKind.EdgeLongCount) + .ToDictionary(e => (e.ILOffset, e.Other)); + } + } + + internal sealed class PgoCompareMethodBasicBlock + { + public int ILOffset { get; init; } + public long BlockCount1 { get; init; } + public long BlockCount2 { get; init; } + + public Dictionary Edges { get; } = new(); + + public override bool Equals(object obj) => obj is PgoCompareMethodBasicBlock block && ILOffset == block.ILOffset; + public override int GetHashCode() => HashCode.Combine(ILOffset); + } +} diff --git a/src/coreclr/tools/dotnet-pgo/Program.cs b/src/coreclr/tools/dotnet-pgo/Program.cs index 4d21459717d028..5100bac27e6d71 100644 --- a/src/coreclr/tools/dotnet-pgo/Program.cs +++ b/src/coreclr/tools/dotnet-pgo/Program.cs @@ -439,7 +439,7 @@ static int InnerCompareMibcMain(CommandLineOptions options) PrintOutput($"# Profiled methods in {name1} not in {name2}: {profiledMethods1.Select(m => m.Method).Except(profiledMethods2.Select(m => m.Method)).Count()}"); PrintOutput($"# Profiled methods in {name2} not in {name1}: {profiledMethods2.Select(m => m.Method).Except(profiledMethods1.Select(m => m.Method)).Count()}"); PrintOutput($"# Methods with profile data in both .mibc files: {profiledMethods1.Select(m => m.Method).Intersect(profiledMethods2.Select(m => m.Method)).Count()}"); - var fgMatches = new List<(MethodProfileData prof1, MethodProfileData prof2)>(); + var fgMatches = new Dictionary(); var fgMismatches = new List<(MethodProfileData prof1, MethodProfileData prof2, List mismatches)>(); foreach (MethodProfileData prof1 in profiledMethods1) @@ -448,36 +448,15 @@ static int InnerCompareMibcMain(CommandLineOptions options) if (prof2?.SchemaData == null) continue; - var (blocks1, blocks2) = (GroupBlocks(prof1), GroupBlocks(prof2)); - var (edges1, edges2) = (GroupEdges(prof1), GroupEdges(prof2)); - - List mismatches = new List(); - if (blocks1.Count > 0 && blocks2.Count > 0) + PgoCompareMethodFlowGraph graph = PgoCompareMethodFlowGraph.Create(prof1, name1, prof2, name2, out var errors); + if (graph != null) { - var in1 = blocks1.Keys.Where(k => !blocks2.ContainsKey(k)).ToList(); - var in2 = blocks2.Keys.Where(k => !blocks1.ContainsKey(k)).ToList(); - - foreach (var m1 in in1) - mismatches.Add($"{name1} has a block at {m1:x} not present in {name2}"); - foreach (var m2 in in2) - mismatches.Add($"{name2} has a block at {m2:x} not present in {name1}"); + fgMatches.Add(prof1.Method, graph); } - - if (edges1.Count > 0 && edges2.Count > 0) + else { - var in1 = edges1.Keys.Where(k => !edges2.ContainsKey(k)).ToList(); - var in2 = edges2.Keys.Where(k => !edges1.ContainsKey(k)).ToList(); - - foreach (var (from, to) in in1) - mismatches.Add($"{name1} has an edge {from:x}->{to:x} not present in {name2}"); - foreach (var (from, to) in in2) - mismatches.Add($"{name2} has an edge {from:x}->{to:x} not present in {name1}"); + fgMismatches.Add((prof1, prof2, errors)); } - - if (mismatches.Count > 0) - fgMismatches.Add((prof1, prof2, mismatches)); - else - fgMatches.Add((prof1, prof2)); } PrintOutput($" Of these, {fgMatches.Count} have matching flow-graphs and the remaining {fgMismatches.Count} do not"); @@ -502,37 +481,13 @@ static int InnerCompareMibcMain(CommandLineOptions options) var blockOverlaps = new List<(MethodDesc Method, double Overlap)>(); var edgeOverlaps = new List<(MethodDesc Method, double Overlap)>(); - foreach ((MethodProfileData prof1, MethodProfileData prof2) in fgMatches) + foreach ((MethodDesc method, PgoCompareMethodFlowGraph fg) in fgMatches) { - var (blocks1, blocks2) = (GroupBlocks(prof1), GroupBlocks(prof2)); - var (edges1, edges2) = (GroupEdges(prof1), GroupEdges(prof2)); + if (fg.ProfilesHadBasicBlocks) + blockOverlaps.Add((method, fg.ComputeBlockOverlap())); - double Overlap(Dictionary left, Dictionary right) - { - long leftTotal = left.Values.Sum(e => e.DataLong); - long rightTotal = right.Values.Sum(e => e.DataLong); - Debug.Assert(left.Keys.All(k => right.ContainsKey(k))); - Debug.Assert(right.Keys.All(k => left.ContainsKey(k))); - - if (leftTotal == 0 && rightTotal == 0) - return 1; - - if (leftTotal == 0 || rightTotal == 0) - return 0; - - var leftPW = left.ToDictionary(k => k.Key, k => k.Value.DataLong / (double)leftTotal); - var rightPW = right.ToDictionary(k => k.Key, k => k.Value.DataLong / (double)rightTotal); - - double overlap = leftPW.Sum(k => Math.Min(k.Value, rightPW[k.Key])); - return overlap; - } - - Debug.Assert(prof1.Method == prof2.Method); - if (blocks1.Count > 0 && blocks2.Count > 0) - blockOverlaps.Add((prof1.Method, Overlap(blocks1, blocks2))); - - if (edges1.Count > 0 && edges2.Count > 0) - edgeOverlaps.Add((prof1.Method, Overlap(edges1, edges2))); + if (fg.ProfilesHadEdges) + edgeOverlaps.Add((method, fg.ComputeEdgeOverlap())); } void PrintHistogram(List<(MethodDesc Method, double Overlap)> overlaps) @@ -614,8 +569,11 @@ void PrintBar(string label, ref int curIndex, Func include, bool f int devirtToSame = 0; int devirtToSameLikelihood100 = 0; int devirtToSameLikelihood70 = 0; - foreach ((MethodProfileData prof1, MethodProfileData prof2) in fgMatches) + foreach ((MethodDesc method, PgoCompareMethodFlowGraph fg) in fgMatches) { + MethodProfileData prof1 = profile1.GetMethodProfileData(method); + MethodProfileData prof2 = profile2.GetMethodProfileData(method); + List typeHandleHistogramCallSites = prof1.SchemaData.Concat(prof2.SchemaData) .Where(e => e.InstrumentationKind == PgoInstrumentationKind.GetLikelyClass || e.InstrumentationKind == PgoInstrumentationKind.TypeHandleHistogramTypeHandle) @@ -662,6 +620,31 @@ string FormatDevirt(GetLikelyClassResult result) PrintOutput($" At +{change.ilOffset:x}: {FormatDevirt(change.result1)} vs {FormatDevirt(change.result2)}"); } } + + if (options.DumpWorstOverlapGraphsTo != null) + { + IEnumerable<(MethodDesc Method, double Overlap)> toDump; + if (options.DumpWorstOverlapGraphs == -1) + toDump = blockOverlaps.OrderBy(t => t.Overlap).TakeWhile(t => t.Overlap < 0.5); + else + toDump = blockOverlaps.OrderBy(t => t.Overlap).Take(options.DumpWorstOverlapGraphs); + + foreach ((MethodDesc method, double overlap) in toDump) + { + PgoCompareMethodFlowGraph fg = fgMatches[method]; + + string title = $"Flowgraph for {method}\\n{name1} vs {name2}"; + if (fg.ProfilesHadBasicBlocks) + title += $"\\nBasic block counts: {fg.TotalBlockCount1} vs {fg.TotalEdgeCount2}"; + if (fg.ProfilesHadEdges) + title += $"\\Edge counts: {fg.TotalEdgeCount1} vs {fg.TotalEdgeCount2}"; + + string dot = fg.Dump(title); + + string fileName = DebugNameFormatter.Instance.FormatName(method.OwningType, DebugNameFormatter.FormatOptions.NamespaceQualify) + "." + method.DiagnosticName; + File.WriteAllText(Path.Combine(options.DumpWorstOverlapGraphsTo.FullName, fileName + ".dot"), dot); + } + } } return 0; @@ -808,8 +791,8 @@ static bool CountersSumToZero(MethodProfileData data) List methodsWithZeroCounters = profiledMethods.Where(CountersSumToZero).ToList(); if (methodsWithZeroCounters.Count > 0) { - PrintOutput($"There are {methodsWithZeroCounters.Count} methods whose counters sum to 0:"); - foreach (MethodProfileData mpd in methodsWithZeroCounters) + PrintOutput($"There are {methodsWithZeroCounters.Count} methods whose counters sum to 0{(methodsWithZeroCounters.Count > 10 ? " (10 shown)" : "")}:"); + foreach (MethodProfileData mpd in methodsWithZeroCounters.Take(10)) PrintOutput($" {mpd.Method}"); } @@ -915,20 +898,6 @@ static bool IsUnknownTypeHandle(int handle) return new GetLikelyClassResult { IsNull = true }; } - private static Dictionary GroupBlocks(MethodProfileData data) - { - return data.SchemaData - .Where(e => e.InstrumentationKind == PgoInstrumentationKind.BasicBlockIntCount || e.InstrumentationKind == PgoInstrumentationKind.BasicBlockLongCount) - .ToDictionary(e => e.ILOffset); - } - - private static Dictionary<(int, int), PgoSchemaElem> GroupEdges(MethodProfileData data) - { - return data.SchemaData - .Where(e => e.InstrumentationKind == PgoInstrumentationKind.EdgeIntCount || e.InstrumentationKind == PgoInstrumentationKind.EdgeLongCount) - .ToDictionary(e => (e.ILOffset, e.Other)); - } - static int InnerProcessTraceFileMain(CommandLineOptions commandLineOptions) { if (commandLineOptions.TraceFile == null) @@ -1003,9 +972,9 @@ static int InnerProcessTraceFileMain(CommandLineOptions commandLineOptions) { PrintError("Trace file contains multiple processes to distinguish between"); PrintOutput("Either a pid or process name from the following list must be specified"); - foreach (TraceProcess proc in traceLog.Processes) + foreach (TraceProcess proc in traceLog.Processes.OrderByDescending(proc => proc.CPUMSec)) { - PrintOutput($"Procname = {proc.Name} Pid = {proc.ProcessID}"); + PrintOutput($"Procname = {proc.Name} Pid = {proc.ProcessID} CPUMsec = {proc.CPUMSec:F0}"); } return 1; } @@ -1321,6 +1290,7 @@ MethodMemoryMap GetMethodMemMap() tsc, idParser, clrInstanceId.Value, + commandLineOptions.PreciseDebugInfoFile?.FullName, s_logger); } @@ -1451,84 +1421,31 @@ void AddToInstrumentationData(int eventClrInstanceId, long methodID, int methodF } } - Dictionary sampleProfiles = new Dictionary(); + SampleCorrelator correlator = null; if (commandLineOptions.Spgo) { - MethodMemoryMap mmap = GetMethodMemMap(); - Dictionary ils = new Dictionary(); - Dictionary flowGraphs = new Dictionary(); - - MethodIL GetMethodIL(MethodDesc desc) - { - if (!ils.TryGetValue(desc, out MethodIL il)) - { - il = desc switch - { - EcmaMethod em => EcmaMethodIL.Create(em), - var m => new InstantiatedMethodIL(m, EcmaMethodIL.Create((EcmaMethod)m.GetTypicalMethodDefinition())), - }; - - ils.Add(desc, il); - } - - return il; - } - - FlowGraph GetFlowGraph(MethodDesc desc) - { - if (!flowGraphs.TryGetValue(desc, out FlowGraph fg)) - { - flowGraphs.Add(desc, fg = FlowGraph.Create(GetMethodIL(desc))); - } - - return fg; - } + correlator = new SampleCorrelator(GetMethodMemMap()); Guid lbrGuid = Guid.Parse("99134383-5248-43fc-834b-529454e75df3"); bool hasLbr = traceLog.Events.Any(e => e.TaskGuid == lbrGuid); if (!hasLbr) { - // No LBR data, use standard IP samples. First convert each sample to a tuple of (Method, raw IP, IL offset). - (MethodDesc Method, ulong IP, int Offset) GetTuple(SampledProfileTraceData e) + foreach (SampledProfileTraceData e in p.EventsInProcess.ByEventType()) { - MemoryRegionInfo info = mmap.GetInfo(e.InstructionPointer); - if (info == null) - return (null, e.InstructionPointer, -1); - - int offset = info.NativeToILMap?.Lookup(checked((uint)(e.InstructionPointer - info.StartAddress))) ?? -1; - return (info.Method, e.InstructionPointer, offset); + correlator.AttributeSamplesToIP(e.InstructionPointer, 1); } - var samples = - p.EventsInProcess.ByEventType() - .Select(GetTuple) - .Where(t => t.Method != null && t.Offset >= 0) - .ToList(); - - // Now find all samples in each method. - foreach (var g in samples.GroupBy(t => t.Method)) - { - // SPGO is quite sensitive with low counts, so check if we should not generate SPGO data for this function. - if (g.Count() < commandLineOptions.SpgoMinSamples) - continue; - - MethodIL il = GetMethodIL(g.Key); - SampleProfile sp = SampleProfile.Create(il, GetFlowGraph(g.Key), g.Select(t => t.Offset)); - sampleProfiles.Add(g.Key, sp); - } - - PrintOutput($"Profile is based on {samples.Count} samples"); + PrintOutput($"Samples outside managed code: {correlator.SamplesOutsideManagedCode}"); + PrintOutput($"Samples in managed code that does not have native<->IL mappings: {correlator.SamplesInManagedCodeWithoutAnyMappings}"); + PrintOutput($"Samples in managed code with mappings that could not be correlated: {correlator.SamplesInManagedCodeOutsideMappings}"); + PrintOutput($"Samples in inlinees that were not present in ETW events: {correlator.SamplesInUnknownInlinees}"); + PrintOutput($"Samples in managed code for which we could not get the IL: {correlator.SamplesInManagedCodeWithoutIL}"); + PrintOutput($"Samples in managed code that could not be attributed to the method's flow graph: {correlator.SamplesInManagedCodeOutsideFlowGraph}"); + PrintOutput($"Samples successfully attributed: {correlator.TotalAttributedSamples}"); } else { - // We have LBR data. We use the LBR data to collect straight-line runs that the CPU did in this process inside managed methods. - // That is, if we first see a branch from A -> B followed by a branch from C -> D, then we can conclude that the CPU executed - // code from B -> C. We call this a 'run' and collect each run and its multiplicity. - // Later, we will find all IL offsets on this path and assign samples to the distinct basic blocks corresponding to those IL offsets. - Dictionary<(ulong startRun, ulong endRun), long> runs = new Dictionary<(ulong startRun, ulong endRun), long>(); - List<(ulong start, ulong end)> lbrRuns = new List<(ulong start, ulong end)>(); - LbrEntry64[] lbr64Arr = null; long numLbrRecords = 0; foreach (var e in traceLog.Events) { @@ -1543,7 +1460,6 @@ FlowGraph GetFlowGraph(MethodDesc desc) unsafe { - Span lbr; if (traceLog.PointerSize == 4) { // For 32-bit machines we convert the data into a 64-bit format first. @@ -1552,18 +1468,7 @@ FlowGraph GetFlowGraph(MethodDesc desc) continue; Span lbr32 = data->Entries(e.EventDataLength); - if (lbr64Arr == null || lbr64Arr.Length < lbr32.Length) - lbr64Arr = new LbrEntry64[lbr32.Length]; - - for (int i = 0; i < lbr32.Length; i++) - { - ref LbrEntry64 entry = ref lbr64Arr[i]; - entry.FromAddress = lbr32[i].FromAddress; - entry.ToAddress = lbr32[i].ToAddress; - entry.Reserved = lbr32[i].Reserved; - } - - lbr = lbr64Arr[0..lbr32.Length]; + correlator.AttributeSampleToLbrRuns(lbr32); } else { @@ -1576,107 +1481,16 @@ FlowGraph GetFlowGraph(MethodDesc desc) if (data->ProcessId != p.ProcessID) continue; - lbr = data->Entries(e.EventDataLength); - } - - // Store runs. LBR is chronological with most recent branches first. - // To avoid double-counting blocks containing calls when the LBR buffer contains - // both the call and the return from the call, we have to do some fancy things - // when seeing cross-function branches, so we use a temporary list of runs - // that we assign into the global dictionary. - lbrRuns.Clear(); - for (int i = lbr.Length - 2; i >= 0; i--) - { - ulong prevFrom = lbr[i + 1].FromAddress; - ulong prevTo = lbr[i + 1].ToAddress; - ulong curFrom = lbr[i].FromAddress; - MemoryRegionInfo prevFromMeth = methodMemMap.GetInfo(prevFrom); - MemoryRegionInfo prevToMeth = methodMemMap.GetInfo(prevTo); - MemoryRegionInfo curFromMeth = methodMemMap.GetInfo(curFrom); - // If this run is not in the same function then ignore it. - if (prevToMeth == null || prevToMeth != curFromMeth) - continue; - - // Otherwise, if this run follows right after jumping back into the function, we might need to extend - // a previous run instead. This happens if we previously did a call out of this function and now returned back. - // TODO: Handle recursion here. The same function could return to itself and we wouldn't realize it from this check. - if (prevFromMeth != prevToMeth) - { - bool extendedPrevRun = false; - // Try to find a previous run. Iterate in reverse to simulate stack behavior of calls. - FlowGraph toFG = null; - for (int j = lbrRuns.Count - 1; j >= 0; j--) - { - MemoryRegionInfo endRunMeth = methodMemMap.GetInfo(lbrRuns[j].end); - if (endRunMeth != prevToMeth) - continue; - - // Same function at least, check for same basic block - toFG ??= GetFlowGraph(endRunMeth.Method); - BasicBlock endRunBB = toFG.Lookup(endRunMeth.NativeToILMap.Lookup((uint)(lbrRuns[j].end - endRunMeth.StartAddress))); - BasicBlock toBB = toFG.Lookup(endRunMeth.NativeToILMap.Lookup((uint)(prevTo - endRunMeth.StartAddress))); - if (endRunBB == toBB && prevTo > lbrRuns[j].end) - { - // Same BB and the jump is to after where the previous run ends. Take that as a return to after that call and extend the previous run. - lbrRuns[j] = (lbrRuns[j].start, curFrom); - extendedPrevRun = true; - break; - } - } - - if (extendedPrevRun) - continue; - } - - lbrRuns.Add((prevTo, curFrom)); - } - - // Now insert runs. - foreach (var pair in lbrRuns) - { - if (runs.TryGetValue(pair, out long count)) - runs[pair] = count + 1; - else - runs.Add(pair, 1); + Span lbr64 = data->Entries(e.EventDataLength); + correlator.AttributeSampleToLbrRuns(lbr64); } } } - // Group runs by memory region info, which corresponds to each .NET method. - var groupedRuns = - runs - .Select(r => (start: r.Key.startRun, end: r.Key.endRun, count: r.Value, info: methodMemMap.GetInfo(r.Key.startRun))) - .GroupBy(t => t.info); - - foreach (var g in groupedRuns) - { - if (g.Key == null || g.Key.NativeToILMap == null) - continue; - - // Collect relative IPs of samples. Note that we cannot translate the end-points of runs from IPs to IL offsets - // as we cannot assume that a straight-line execution between two IPs corresponds to a straight-line execution between - // two IL offsets. SampleProfile.CreateFromLbr will be responsible for assigning samples based on the flow graph relative IPs, - // the IP<->IL mapping and the flow graph. - List<(uint start, uint end, long count)> samples = - g - .Where(t => t.end >= t.start && t.end < g.Key.EndAddress) - .Select(t => ((uint)(t.start - g.Key.StartAddress), (uint)(t.end - g.Key.StartAddress), t.count)) - .ToList(); - - if (samples.Sum(t => t.count) < commandLineOptions.SpgoMinSamples) - continue; - - SampleProfile ep = SampleProfile.CreateFromLbr( - GetMethodIL(g.Key.Method), - GetFlowGraph(g.Key.Method), - g.Key.NativeToILMap, - samples); - - sampleProfiles.Add(g.Key.Method, ep); - } - PrintOutput($"Profile is based on {numLbrRecords} LBR records"); } + + correlator.SmootheAllProfiles(); } if (commandLineOptions.DisplayProcessedEvents) @@ -1732,40 +1546,60 @@ FlowGraph GetFlowGraph(MethodDesc desc) var intDecompressor = new PgoProcessor.PgoEncodedCompressedIntParser(instrumentationData, 0); methodData.InstrumentationData = PgoProcessor.ParsePgoData(pgoDataLoader, intDecompressor, true).ToArray(); } - else if (sampleProfiles.TryGetValue(methodData.Method, out SampleProfile sp)) + else { - IEnumerable schema = Enumerable.Empty(); - - if (commandLineOptions.SpgoIncludeBlockCounts) + SampleProfile sp = correlator?.GetProfile(methodData.Method); + if (sp != null && sp.AttributedSamples >= commandLineOptions.SpgoMinSamples) { - schema = schema.Concat( + IEnumerable schema = sp.SmoothedSamples - .Select(kvp => + .Select( + kvp => new PgoSchemaElem { InstrumentationKind = kvp.Value > uint.MaxValue ? PgoInstrumentationKind.BasicBlockLongCount : PgoInstrumentationKind.BasicBlockIntCount, ILOffset = kvp.Key.Start, Count = 1, DataLong = kvp.Value, - })); - } + }); - if (commandLineOptions.SpgoIncludeEdgeCounts) - { - schema = schema.Concat( - sp.SmoothedEdgeSamples - .Select(kvp => - new PgoSchemaElem - { - InstrumentationKind = kvp.Value > uint.MaxValue ? PgoInstrumentationKind.EdgeLongCount : PgoInstrumentationKind.EdgeIntCount, - ILOffset = kvp.Key.Item1.Start, - Other = kvp.Key.Item2.Start, - Count = 1, - DataLong = kvp.Value - })); - } + if (commandLineOptions.IncludeFullGraphs) + { + schema = schema.Concat( + sp.SmoothedEdgeSamples + .Select(kvp => + new PgoSchemaElem + { + InstrumentationKind = kvp.Value > uint.MaxValue ? PgoInstrumentationKind.EdgeLongCount : PgoInstrumentationKind.EdgeIntCount, + ILOffset = kvp.Key.Item1.Start, + Other = kvp.Key.Item2.Start, + Count = 1, + DataLong = kvp.Value + })); + } + + methodData.InstrumentationData = schema.ToArray(); - methodData.InstrumentationData = schema.ToArray(); +#if DEBUG + if (commandLineOptions.IncludeFullGraphs) + { + var writtenBlocks = + new HashSet( + methodData.InstrumentationData + .Where(elem => elem.InstrumentationKind == PgoInstrumentationKind.BasicBlockIntCount || elem.InstrumentationKind == PgoInstrumentationKind.BasicBlockLongCount) + .Select(elem => elem.ILOffset)); + + var writtenEdges = + new HashSet<(int, int)>( + methodData.InstrumentationData + .Where(elem => elem.InstrumentationKind == PgoInstrumentationKind.EdgeIntCount || elem.InstrumentationKind == PgoInstrumentationKind.EdgeLongCount) + .Select(elem => (elem.ILOffset, elem.Other))); + + Debug.Assert(writtenBlocks.SetEquals(sp.FlowGraph.BasicBlocks.Select(bb => bb.Start))); + Debug.Assert(writtenEdges.SetEquals(sp.FlowGraph.BasicBlocks.SelectMany(bb => bb.Targets.Select(bbTar => (bb.Start, bbTar.Start))))); + } +#endif + } } methodsUsedInProcess.Add(methodData); diff --git a/src/coreclr/tools/dotnet-pgo/SPGO/FlowGraphHelper.cs b/src/coreclr/tools/dotnet-pgo/SPGO/FlowGraphHelper.cs new file mode 100644 index 00000000000000..ac1b24a9365129 --- /dev/null +++ b/src/coreclr/tools/dotnet-pgo/SPGO/FlowGraphHelper.cs @@ -0,0 +1,85 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Internal.IL; + +namespace Microsoft.Diagnostics.Tools.Pgo +{ + internal static class FlowGraphHelper + { + public static string DumpGraph( + List nodes, + T startNode, + Func> getSuccessors, + string title, + Func getNodeLabel, + Func<(T, T), string> getEdgeLabel) + { + var sb = new StringBuilder(); + sb.AppendLine("digraph G {"); + sb.AppendLine(" labelloc=\"t\";"); + sb.AppendLine($" label=\"{title}\";"); + sb.AppendLine(" forcelabels=true;"); + sb.AppendLine(); + Dictionary bbToIndex = new Dictionary(); + for (int i = 0; i < nodes.Count; i++) + bbToIndex.Add(nodes[i], i); + + foreach (T bb in nodes) + { + string label = $"{getNodeLabel(bb)}"; + sb.AppendLine($" N{bbToIndex[bb]} [label=\"{label}\"];"); + } + + sb.AppendLine(); + + foreach (T bb in nodes) + { + foreach (T tar in getSuccessors(bb)) + { + string label = getEdgeLabel((bb, tar)); + string postfix = string.IsNullOrEmpty(label) ? "" : $" [label=\"{label}\"]"; + sb.AppendLine($" N{bbToIndex[bb]} -> N{bbToIndex[tar]}{postfix};"); + } + } + + // Write ranks with BFS. + List curRank = new List { startNode }; + HashSet seen = new HashSet(curRank); + while (curRank.Count > 0) + { + sb.AppendLine($" {{rank = same; {string.Concat(curRank.Select(bb => $"N{bbToIndex[bb]}; "))}}}"); + curRank = curRank.SelectMany(getSuccessors).Where(seen.Add).ToList(); + } + + sb.AppendLine("}"); + return sb.ToString(); + } + + public static string Dump( + this FlowGraph fg, + Func getNodeAnnot, + Func<(BasicBlock, BasicBlock), string> getEdgeAnnot) + { + string getBasicBlockLabel(BasicBlock bb) + { + string label = $"[{bb.Start:x}..{bb.Start + bb.Size:x})\\n{getNodeAnnot(bb)}"; + return label; + } + + return DumpGraph( + fg.BasicBlocks, + fg.BasicBlocks.Single(bb => bb.Start == 0), + bb => bb.Targets, + "", + getBasicBlockLabel, + getEdgeAnnot); + + } + } +} diff --git a/src/coreclr/tools/dotnet-pgo/SPGO/KeyValueMap.cs b/src/coreclr/tools/dotnet-pgo/SPGO/KeyValueMap.cs new file mode 100644 index 00000000000000..189559dcc63ccf --- /dev/null +++ b/src/coreclr/tools/dotnet-pgo/SPGO/KeyValueMap.cs @@ -0,0 +1,76 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Diagnostics; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using ILCompiler.Reflection.ReadyToRun; +using Microsoft.Diagnostics.Tracing.Parsers.Clr; + +namespace Microsoft.Diagnostics.Tools.Pgo +{ + /// + /// A simple key-value map that does not support updates and that supports range queries. + /// + /// The type of values. + public class KeyValueMap where TKey : IComparable + { + // Native offsets in order + private TKey[] _keys; + private TValue[] _values; + + public KeyValueMap(TKey[] keys, TValue[] values) + { + Trace.Assert(keys.Length == values.Length); + + _keys = keys; + _values = values; + } + + // Find last index of a key that is smaller than the specified input key. + private int LookupIndex(TKey key) + { + int index = Array.BinarySearch(_keys, key); + if (index < 0) + index = ~index - 1; + + // If rva is before first binary search will return ~0 so index will be -1. + if (index < 0) + return -1; + + return index; + } + + public bool TryLookup(TKey key, out TValue value) + { + int index = LookupIndex(key); + if (index == -1) + { + value = default; + return false; + } + + value = _values[index]; + return true; + } + + public IEnumerable LookupRange(TKey min, TKey max) + { + Debug.Assert(min.CompareTo(max) <= 0); + + int start = LookupIndex(min); + if (start < 0) + start = 0; + + int end = LookupIndex(max); + if (end < 0) + yield break; + + for (int i = start; i <= end; i++) + yield return _values[i]; + } + } +} diff --git a/src/coreclr/tools/dotnet-pgo/SPGO/NativeToILMap.cs b/src/coreclr/tools/dotnet-pgo/SPGO/NativeToILMap.cs deleted file mode 100644 index a61c2484dac6db..00000000000000 --- a/src/coreclr/tools/dotnet-pgo/SPGO/NativeToILMap.cs +++ /dev/null @@ -1,80 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System; -using System.Collections.Generic; -using System.Linq; -using System.Text; -using System.Threading.Tasks; -using ILCompiler.Reflection.ReadyToRun; -using Microsoft.Diagnostics.Tracing.Parsers.Clr; - -namespace Microsoft.Diagnostics.Tools.Pgo -{ - public class NativeToILMap - { - // Native offsets in order - private uint[] _nativeOffsets; - // Map from native offset to IL offset - private int[] _ilOffsets; - - public NativeToILMap(uint[] nativeOffsets, int[] ilOffsets) - { - _nativeOffsets = nativeOffsets; - _ilOffsets = ilOffsets; - } - - private int LookupIndex(uint rva) - { - int index = Array.BinarySearch(_nativeOffsets, rva); - if (index < 0) - index = ~index - 1; - - // If rva is before first binary search will return ~0 so index will be -1. - if (index < 0) - return -1; - - return index; - } - - /// Look up IL offset associated with block that contains RVA. - public int Lookup(uint rva) - => LookupIndex(rva) switch - { - -1 => -1, - int index => _ilOffsets[index] - }; - - public IEnumerable LookupRange(uint rvaStart, uint rvaEnd) - { - int start = LookupIndex(rvaStart); - if (start < 0) - start = 0; - - int end = LookupIndex(rvaEnd); - if (end < 0) - yield break; - - for (int i = start; i <= end; i++) - yield return _ilOffsets[i]; - } - - internal static NativeToILMap FromR2RBounds(List boundsList) - { - List sorted = boundsList.OrderBy(e => e.NativeOffset).ToList(); - - return new NativeToILMap(sorted.Select(e => e.NativeOffset).ToArray(), sorted.Select(e => (int)e.ILOffset).ToArray()); - } - - internal static NativeToILMap FromEvent(MethodILToNativeMapTraceData ev) - { - List<(uint rva, int ilOffset)> pairs = new List<(uint rva, int ilOffset)>(ev.CountOfMapEntries); - for (int i = 0; i < ev.CountOfMapEntries; i++) - pairs.Add(((uint)ev.NativeOffset(i), ev.ILOffset(i))); - - pairs.RemoveAll(p => p.ilOffset < 0); - pairs.Sort((p1, p2) => p1.rva.CompareTo(p2.rva)); - return new NativeToILMap(pairs.Select(p => p.rva).ToArray(), pairs.Select(p => p.ilOffset).ToArray()); - } - } -} diff --git a/src/coreclr/tools/dotnet-pgo/SPGO/PreciseDebugInfo.cs b/src/coreclr/tools/dotnet-pgo/SPGO/PreciseDebugInfo.cs new file mode 100644 index 00000000000000..ac7dbf83e26a89 --- /dev/null +++ b/src/coreclr/tools/dotnet-pgo/SPGO/PreciseDebugInfo.cs @@ -0,0 +1,27 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace Microsoft.Diagnostics.Tools.Pgo +{ + internal record class PreciseDebugInfo( + ulong MethodID, + InlineContext InlineTree, + List Mappings); + + internal record class InlineContext( + uint Ordinal, + ulong MethodID, + string MethodName, + List Inlinees); + + internal record class PreciseIPMapping( + uint NativeOffset, + uint InlineContext, + uint ILOffset); +} diff --git a/src/coreclr/tools/dotnet-pgo/SPGO/SampleCorrelator.cs b/src/coreclr/tools/dotnet-pgo/SPGO/SampleCorrelator.cs new file mode 100644 index 00000000000000..5415f28910c87f --- /dev/null +++ b/src/coreclr/tools/dotnet-pgo/SPGO/SampleCorrelator.cs @@ -0,0 +1,278 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Internal.IL; +using Internal.TypeSystem; +using Internal.TypeSystem.Ecma; +using Microsoft.Diagnostics.Tracing.Parsers.Kernel; + +namespace Microsoft.Diagnostics.Tools.Pgo +{ + /// + /// A class that handles correlation IP samples/LBR samples back to managed methods. + /// + internal class SampleCorrelator + { + private readonly Dictionary _methodInf = new Dictionary(); + + private readonly MethodMemoryMap _memMap; + + public SampleCorrelator(MethodMemoryMap memMap) + { + _memMap = memMap; + } + + public long SamplesOutsideManagedCode { get; private set; } + public long SamplesInManagedCodeWithoutAnyMappings { get; private set; } + public long SamplesInManagedCodeOutsideMappings { get; private set; } + public long SamplesInUnknownInlinees { get; private set; } + public long SamplesInManagedCodeWithoutIL { get; private set; } + public long SamplesInManagedCodeOutsideFlowGraph { get; private set; } + public long TotalAttributedSamples { get; private set; } + + private PerMethodInfo GetOrCreateInfo(MethodDesc md) + { + if (!_methodInf.TryGetValue(md, out PerMethodInfo pmi)) + { + MethodIL il = + md switch + { + EcmaMethod em => EcmaMethodIL.Create(em), + _ => new InstantiatedMethodIL(md, EcmaMethodIL.Create((EcmaMethod)md.GetTypicalMethodDefinition())), + }; + + if (il == null) + { + return null; + } + + _methodInf.Add(md, pmi = new PerMethodInfo()); + pmi.IL = il; + pmi.FlowGraph = FlowGraph.Create(il); + pmi.Profile = new SampleProfile(pmi.IL, pmi.FlowGraph); + } + + return pmi; + } + + public SampleProfile GetProfile(MethodDesc md) + => _methodInf.GetValueOrDefault(md)?.Profile; + + public void SmootheAllProfiles() + { + foreach (PerMethodInfo pmi in _methodInf.Values) + pmi.Profile.SmootheFlow(); + } + + public void AttributeSamplesToIP(ulong ip, long numSamples) + { + MemoryRegionInfo region = _memMap.GetInfo(ip); + if (region == null) + { + SamplesOutsideManagedCode += numSamples; + return; + } + + if (region.NativeToILMap == null) + { + SamplesInManagedCodeWithoutAnyMappings += numSamples; + return; + } + + if (!region.NativeToILMap.TryLookup(checked((uint)(ip - region.StartAddress)), out IPMapping mapping)) + { + SamplesInManagedCodeOutsideMappings += numSamples; + return; + } + + if (mapping.InlineeMethod == null) + { + SamplesInUnknownInlinees += numSamples; + return; + } + + PerMethodInfo pmi = GetOrCreateInfo(mapping.InlineeMethod); + if (pmi == null) + { + SamplesInManagedCodeWithoutIL += numSamples; + return; + } + + if (pmi.Profile.TryAttributeSamples(mapping.ILOffset, 1)) + { + TotalAttributedSamples += numSamples; + } + else + { + SamplesInManagedCodeOutsideFlowGraph += numSamples; + } + } + + private LbrEntry64[] _convertedEntries; + public void AttributeSampleToLbrRuns(Span lbr) + { + if (_convertedEntries == null || _convertedEntries.Length < lbr.Length) + { + Array.Resize(ref _convertedEntries, lbr.Length); + } + + Span convertedEntries = _convertedEntries[..lbr.Length]; + for (int i = 0; i < lbr.Length; i++) + { + ref LbrEntry64 entry = ref convertedEntries[i]; + entry.FromAddress = lbr[i].FromAddress; + entry.ToAddress = lbr[i].ToAddress; + entry.Reserved = lbr[i].Reserved; + } + + AttributeSampleToLbrRuns(convertedEntries); + } + + private readonly List<(BasicBlock, int)> _callStack = new(); + private readonly HashSet<(InlineContext, BasicBlock)> _seenOnRun = new(); + public void AttributeSampleToLbrRuns(Span lbr) + { + // LBR record represents branches taken by the CPU, in + // chronological order with most recent branches first. Using this + // data we can construct the 'runs' of instructions executed by the + // CPU. We attribute a sample to all basic blocks in each run. + // + // As an example, if we see a branch A -> B followed by a branch C -> D, + // we conclude that the CPU executed the instructions from B to C. + // + // Note that we need some special logic to handle calls. If we see + // a call A -> B followed by a return B -> A, a straightforward + // attribution process would attribute multiple samples to the + // block containing A. To deal with this we track in a list the + // basic blocks + offsets that we left, and if we see a return to + // the same basic block at a later offset, we skip that basic + // block. + // Note that this is an approximation of the call stack as we + // cannot differentiate between tailcalls and calls, so there + // may be BBs we left in here that we never return to. + // Therefore we cannot just use a straightforward stack. + List<(BasicBlock, int)> callStack = _callStack; + callStack.Clear(); + + // On each run we translate the endpoint RVAs to all IL offset + // mappings we have for that range. It is possible (and happens + // often) that we see multiple IL offsets corresponding to the same + // basic block. + // + // Therefore, we keep track of the basic blocks we have seen in each + // run to make sure we only attribute once. However, due to inlinees + // we sometimes may want to attribute twice, for example if A is inlined in + // A(); A(); + // Therefore, we also key by the inline context. + HashSet<(InlineContext, BasicBlock)> seenOnRun = _seenOnRun; + + MethodMemoryMap memMap = _memMap; + + for (int i = lbr.Length - 2; i >= 0; i--) + { + ref LbrEntry64 prev = ref lbr[i + 1]; + ref LbrEntry64 cur = ref lbr[i]; + + MemoryRegionInfo prevToInf = memMap.GetInfo(prev.ToAddress); + MemoryRegionInfo curFromInf = memMap.GetInfo(cur.FromAddress); + + // If this run is not in the same function then ignore it. + // This probably means IP was changed out from beneath us while + // recording. + if (prevToInf == null || prevToInf != curFromInf) + continue; + + if (curFromInf.NativeToILMap == null) + continue; + + // Attribute samples to run. + seenOnRun.Clear(); + uint rvaMin = checked((uint)(prev.ToAddress - prevToInf.StartAddress)); + uint rvaMax = checked((uint)(cur.FromAddress - curFromInf.StartAddress)); + int lastILOffs = -1; + BasicBlock lastBB = null; + bool isFirst = true; + foreach (IPMapping mapping in curFromInf.NativeToILMap.LookupRange(rvaMin, rvaMax)) + { + bool isFirstMapping = isFirst; + isFirst = false; + + if (mapping.InlineeMethod == null) + continue; + + PerMethodInfo pmi = GetOrCreateInfo(mapping.InlineeMethod); + if (pmi == null) + continue; + + BasicBlock bb = pmi.FlowGraph.Lookup(mapping.ILOffset); + if (bb == null) + continue; + + lastBB = bb; + lastILOffs = mapping.ILOffset; + + if (seenOnRun.Add((mapping.InlineContext, bb))) + { + if (isFirstMapping) + { + // This is the first mapping in the run. Check to + // see if we returned to this BB in the callstack, + // and if so, skip attributing anything to the + // first BB. + + bool skip = false; + + for (int j = callStack.Count - 1; j >= 0; j++) + { + (BasicBlock callFromBB, int callFromILOffs) = callStack[j]; + if (callFromBB == bb && mapping.ILOffset >= callFromILOffs) + { + // Yep, we previously left 'bb' at + // 'callFromILOffs', and now we are jumping + // back to the same BB at a later offset. + skip = true; + callStack.RemoveRange(j, callStack.Count - j); + break; + } + } + + if (skip) + continue; + } + + pmi.Profile.AttributeSamples(bb, 1); + } + } + + // Now see if this is a cross-function jump. + MemoryRegionInfo curToInf = memMap.GetInfo(cur.ToAddress); + // TODO: This check and above skipping logic does not handle recursion. + if (curFromInf != curToInf) + { + // Yes, either different managed function or not managed function (e.g. prestub). + // Record this. + if (lastBB != null) + { + callStack.Add((lastBB, lastILOffs)); + } + } + } + } + + public override string ToString() => $"{TotalAttributedSamples} samples in {_methodInf.Count} methods"; + + private class PerMethodInfo + { + public MethodIL IL { get; set; } + public FlowGraph FlowGraph { get; set; } + public SampleProfile Profile { get; set; } + + public override string ToString() => IL.OwningMethod.ToString(); + } + } +} diff --git a/src/coreclr/tools/dotnet-pgo/SPGO/SampleProfile.cs b/src/coreclr/tools/dotnet-pgo/SPGO/SampleProfile.cs index f60753b0b571bd..856f5291888953 100644 --- a/src/coreclr/tools/dotnet-pgo/SPGO/SampleProfile.cs +++ b/src/coreclr/tools/dotnet-pgo/SPGO/SampleProfile.cs @@ -16,69 +16,56 @@ namespace Microsoft.Diagnostics.Tools.Pgo { internal class SampleProfile { + private Dictionary _rawSamples = new Dictionary(); + private Dictionary _smoothedSamples; + private Dictionary<(BasicBlock, BasicBlock), long> _smoothedEdgeSamples; + public SampleProfile( MethodIL methodIL, - FlowGraph fg, - Dictionary samples, - Dictionary smoothedSamples, - Dictionary<(BasicBlock, BasicBlock), long> smoothedEdgeSamples) + FlowGraph fg) { MethodIL = methodIL; FlowGraph = fg; - Samples = samples; - SmoothedSamples = smoothedSamples; - SmoothedEdgeSamples = smoothedEdgeSamples; } public MethodIL MethodIL { get; } public FlowGraph FlowGraph { get; } - public Dictionary Samples { get; } - public Dictionary SmoothedSamples { get; } - public Dictionary<(BasicBlock, BasicBlock), long> SmoothedEdgeSamples { get; } + public IReadOnlyDictionary RawSamples => _rawSamples; + public IReadOnlyDictionary SmoothedSamples => _smoothedSamples; + public IReadOnlyDictionary<(BasicBlock, BasicBlock), long> SmoothedEdgeSamples => _smoothedEdgeSamples; + public long AttributedSamples { get; set; } - /// - /// Given pairs of runs (as relative IPs in this function), create a sample profile. - /// - public static SampleProfile CreateFromLbr(MethodIL il, FlowGraph fg, NativeToILMap map, IEnumerable<(uint fromRva, uint toRva, long count)> runs) + public bool TryAttributeSamples(int ilOffset, long count) { - Dictionary bbSamples = fg.BasicBlocks.ToDictionary(bb => bb, bb => 0L); - foreach ((uint from, uint to, long count) in runs) - { - foreach (BasicBlock bb in map.LookupRange(from, to).Select(fg.Lookup).Distinct()) - { - if (bb != null) - bbSamples[bb] += count; - } - } + BasicBlock bb = FlowGraph.Lookup(ilOffset); + if (bb == null) + return false; - FlowSmoothing flowSmooth = new FlowSmoothing(bbSamples, fg.Lookup(0), bb => bb.Targets, (bb, isForward) => bb.Size * (isForward ? 1 : 50) + 2); - flowSmooth.Perform(); + AttributeSamples(bb, count); + return true; + } - return new SampleProfile(il, fg, bbSamples, flowSmooth.NodeResults, flowSmooth.EdgeResults); + public void AttributeSamples(BasicBlock bb, long count) + { + Debug.Assert(FlowGraph.Lookup(bb.Start) == bb); + CollectionsMarshal.GetValueRefOrAddDefault(_rawSamples, bb, out _) += count; + AttributedSamples += count; } - /// - /// Given some IL offset samples into a method, construct a profile. - /// - public static SampleProfile Create(MethodIL il, FlowGraph fg, IEnumerable ilOffsetSamples) + public void SmootheFlow() { - // Now associate raw IL-offset samples with basic blocks. - Dictionary bbSamples = fg.BasicBlocks.ToDictionary(bb => bb, bb => 0L); - foreach (int ofs in ilOffsetSamples) + foreach (BasicBlock bb in FlowGraph.BasicBlocks) { - if (ofs == -1) - continue; - - BasicBlock bb = fg.Lookup(ofs); - if (bb != null) - bbSamples[bb]++; + if (!_rawSamples.ContainsKey(bb)) + _rawSamples.Add(bb, 0); } - // Smooth the graph to produce something that satisfies flow conservation. - FlowSmoothing flowSmooth = new FlowSmoothing(bbSamples, fg.Lookup(0), bb => bb.Targets, (bb, isForward) => bb.Size * (isForward ? 1 : 50) + 2); + FlowSmoothing flowSmooth = new(_rawSamples, FlowGraph.Lookup(0), bb => bb.Targets, (bb, isForward) => bb.Size * (isForward ? 1 : 50) + 2); flowSmooth.Perform(); - - return new SampleProfile(il, fg, bbSamples, flowSmooth.NodeResults, flowSmooth.EdgeResults); + _smoothedSamples = flowSmooth.NodeResults; + _smoothedEdgeSamples = flowSmooth.EdgeResults; } + + public override string ToString() => $"{AttributedSamples} samples"; } } From 5ab02ec6be1f783c95073d5a196ac0757ec8dd36 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 17 Nov 2021 15:59:13 +0100 Subject: [PATCH 2/6] Run jit-format --- src/coreclr/jit/codegencommon.cpp | 11 ++++------- src/coreclr/jit/compiler.h | 4 ++-- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index fecac126540c11..8966d74da862ca 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -10870,12 +10870,9 @@ void CodeGen::genDumpPreciseDebugInfo() first = false; - fprintf( - file, - "{\"NativeOffset\":%u,\"InlineContext\":%u,\"ILOffset\":%u}", - mapping->nativeLoc.CodeOffset(GetEmitter()), - mapping->debugInfo.GetInlineContext()->GetOrdinal(), - mapping->debugInfo.GetLocation().GetOffset()); + fprintf(file, "{\"NativeOffset\":%u,\"InlineContext\":%u,\"ILOffset\":%u}", + mapping->nativeLoc.CodeOffset(GetEmitter()), mapping->debugInfo.GetInlineContext()->GetOrdinal(), + mapping->debugInfo.GetLocation().GetOffset()); } fprintf(file, "]}\n"); @@ -10888,7 +10885,7 @@ void CodeGen::genDumpPreciseDebugInfo() void CodeGen::genAddPreciseIPMappingHere(const DebugInfo& di) { PreciseIPMapping* mapping = new (compiler, CMK_DebugInfo) PreciseIPMapping; - mapping->next = nullptr; + mapping->next = nullptr; mapping->nativeLoc.CaptureLocation(GetEmitter()); mapping->debugInfo = di; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 3055b4beb8de62..9ec7dc94179ea2 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2538,8 +2538,8 @@ struct IPmappingDsc struct PreciseIPMapping { PreciseIPMapping* next; - emitLocation nativeLoc; - DebugInfo debugInfo; + emitLocation nativeLoc; + DebugInfo debugInfo; }; /* From 4dc9fa2ab797fb6aa8c9997cffe0fd81cd834811 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 17 Nov 2021 17:32:51 +0100 Subject: [PATCH 3/6] Use better lock, include overlap in graph titles, minor other fixes --- src/coreclr/jit/codegencommon.cpp | 8 +--- .../tools/dotnet-pgo/MethodMemoryMap.cs | 6 +-- src/coreclr/tools/dotnet-pgo/Program.cs | 37 ++++++++++++++++--- .../tools/dotnet-pgo/SPGO/SampleCorrelator.cs | 2 +- 4 files changed, 37 insertions(+), 16 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 8966d74da862ca..e8d184aef83561 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -10843,10 +10843,8 @@ void CodeGen::genDumpPreciseDebugInfo() if (JitConfig.JitDumpPreciseDebugInfoFile() == nullptr) return; - static unsigned int s_flag; - - while (InterlockedCompareExchange(&s_flag, 1, 0) != 0) - System_YieldProcessor(); + static CritSecObject s_critSect; + CritSecHolder holder(s_critSect); FILE* file = _wfopen(JitConfig.JitDumpPreciseDebugInfoFile(), W("a")); if (file == nullptr) @@ -10878,8 +10876,6 @@ void CodeGen::genDumpPreciseDebugInfo() fprintf(file, "]}\n"); fclose(file); - - InterlockedCompareExchange(&s_flag, 0, 1); } void CodeGen::genAddPreciseIPMappingHere(const DebugInfo& di) diff --git a/src/coreclr/tools/dotnet-pgo/MethodMemoryMap.cs b/src/coreclr/tools/dotnet-pgo/MethodMemoryMap.cs index e502bff6f027fe..ed80098b9ecfc8 100644 --- a/src/coreclr/tools/dotnet-pgo/MethodMemoryMap.cs +++ b/src/coreclr/tools/dotnet-pgo/MethodMemoryMap.cs @@ -51,7 +51,7 @@ public MethodMemoryMap( TraceTypeSystemContext tsc, TraceRuntimeDescToTypeSystemDesc idParser, int clrInstanceID, - string preciseDebugInfoFile, + FileInfo preciseDebugInfoFile, Logger logger) { // Capture the addresses of jitted code @@ -161,10 +161,10 @@ public MethodMemoryMap( } List preciseInfos = null; - if (File.Exists(preciseDebugInfoFile)) + if (preciseDebugInfoFile != null) { preciseInfos = - File.ReadAllLines(preciseDebugInfoFile) + File.ReadAllLines(preciseDebugInfoFile.FullName) .Select(l => JsonSerializer.Deserialize(l)) .ToList(); } diff --git a/src/coreclr/tools/dotnet-pgo/Program.cs b/src/coreclr/tools/dotnet-pgo/Program.cs index 5100bac27e6d71..6248c700c62285 100644 --- a/src/coreclr/tools/dotnet-pgo/Program.cs +++ b/src/coreclr/tools/dotnet-pgo/Program.cs @@ -623,21 +623,46 @@ string FormatDevirt(GetLikelyClassResult result) if (options.DumpWorstOverlapGraphsTo != null) { - IEnumerable<(MethodDesc Method, double Overlap)> toDump; + IEnumerable toDump; if (options.DumpWorstOverlapGraphs == -1) - toDump = blockOverlaps.OrderBy(t => t.Overlap).TakeWhile(t => t.Overlap < 0.5); + { + // Take all with less than 0.5 overlap in order. + toDump = + blockOverlaps + .Concat(edgeOverlaps) + .OrderBy(t => t.Overlap) + .TakeWhile(t => t.Overlap < 0.5) + .Select(t => t.Method) + .Distinct(); + } else - toDump = blockOverlaps.OrderBy(t => t.Overlap).Take(options.DumpWorstOverlapGraphs); + { + // Take the first N methods ordered by min(blockOverlap, edgeOverlap). + toDump = + blockOverlaps + .Concat(edgeOverlaps) + .GroupBy(t => t.Method) + .Select(g => (Method: g.Key, Overlap: g.Select(t => t.Overlap).Min())) + .OrderBy(t => t.Overlap) + .Select(t => t.Method) + .Take(options.DumpWorstOverlapGraphs); + } - foreach ((MethodDesc method, double overlap) in toDump) + foreach (MethodDesc method in toDump) { PgoCompareMethodFlowGraph fg = fgMatches[method]; string title = $"Flowgraph for {method}\\n{name1} vs {name2}"; if (fg.ProfilesHadBasicBlocks) + { title += $"\\nBasic block counts: {fg.TotalBlockCount1} vs {fg.TotalEdgeCount2}"; + title += $"\\nBasic block count overlap: {fg.ComputeBlockOverlap() * 100:F2}%"; + } if (fg.ProfilesHadEdges) - title += $"\\Edge counts: {fg.TotalEdgeCount1} vs {fg.TotalEdgeCount2}"; + { + title += $"\\nEdge counts: {fg.TotalEdgeCount1} vs {fg.TotalEdgeCount2}"; + title += $"\\nEdge count overlap: {fg.ComputeEdgeOverlap() * 100:F2}%"; + } string dot = fg.Dump(title); @@ -1290,7 +1315,7 @@ MethodMemoryMap GetMethodMemMap() tsc, idParser, clrInstanceId.Value, - commandLineOptions.PreciseDebugInfoFile?.FullName, + commandLineOptions.PreciseDebugInfoFile, s_logger); } diff --git a/src/coreclr/tools/dotnet-pgo/SPGO/SampleCorrelator.cs b/src/coreclr/tools/dotnet-pgo/SPGO/SampleCorrelator.cs index 5415f28910c87f..d6cef34e31da53 100644 --- a/src/coreclr/tools/dotnet-pgo/SPGO/SampleCorrelator.cs +++ b/src/coreclr/tools/dotnet-pgo/SPGO/SampleCorrelator.cs @@ -14,7 +14,7 @@ namespace Microsoft.Diagnostics.Tools.Pgo { /// - /// A class that handles correlation IP samples/LBR samples back to managed methods. + /// A class that handles correlating IP samples/LBR samples back to managed methods. /// internal class SampleCorrelator { From 6e83c4260fcf31d6b02a3e6ada5919d248c34689 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 17 Nov 2021 17:49:27 +0100 Subject: [PATCH 4/6] Make sure graph file names are valid --- src/coreclr/tools/dotnet-pgo/Program.cs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/coreclr/tools/dotnet-pgo/Program.cs b/src/coreclr/tools/dotnet-pgo/Program.cs index 6248c700c62285..6c8439b42dd89d 100644 --- a/src/coreclr/tools/dotnet-pgo/Program.cs +++ b/src/coreclr/tools/dotnet-pgo/Program.cs @@ -667,6 +667,9 @@ string FormatDevirt(GetLikelyClassResult result) string dot = fg.Dump(title); string fileName = DebugNameFormatter.Instance.FormatName(method.OwningType, DebugNameFormatter.FormatOptions.NamespaceQualify) + "." + method.DiagnosticName; + foreach (char c in Path.GetInvalidFileNameChars()) + fileName = fileName.Replace(c, '_'); + File.WriteAllText(Path.Combine(options.DumpWorstOverlapGraphsTo.FullName, fileName + ".dot"), dot); } } From 5e1c7080fa9aa74c61682cddffdca102d1378e69 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 17 Nov 2021 17:52:48 +0100 Subject: [PATCH 5/6] Run jit-format --- src/coreclr/jit/codegencommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index e8d184aef83561..4ad5ce8c482f14 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -10844,7 +10844,7 @@ void CodeGen::genDumpPreciseDebugInfo() return; static CritSecObject s_critSect; - CritSecHolder holder(s_critSect); + CritSecHolder holder(s_critSect); FILE* file = _wfopen(JitConfig.JitDumpPreciseDebugInfoFile(), W("a")); if (file == nullptr) From af605ad355f33a12c04b092b425a990e94d84934 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 17 Nov 2021 18:38:05 +0100 Subject: [PATCH 6/6] Rename Smoothe -> Smooth --- src/coreclr/tools/dotnet-pgo/Program.cs | 2 +- src/coreclr/tools/dotnet-pgo/SPGO/SampleCorrelator.cs | 4 ++-- src/coreclr/tools/dotnet-pgo/SPGO/SampleProfile.cs | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/tools/dotnet-pgo/Program.cs b/src/coreclr/tools/dotnet-pgo/Program.cs index 6c8439b42dd89d..b7534e900b6c89 100644 --- a/src/coreclr/tools/dotnet-pgo/Program.cs +++ b/src/coreclr/tools/dotnet-pgo/Program.cs @@ -1518,7 +1518,7 @@ void AddToInstrumentationData(int eventClrInstanceId, long methodID, int methodF PrintOutput($"Profile is based on {numLbrRecords} LBR records"); } - correlator.SmootheAllProfiles(); + correlator.SmoothAllProfiles(); } if (commandLineOptions.DisplayProcessedEvents) diff --git a/src/coreclr/tools/dotnet-pgo/SPGO/SampleCorrelator.cs b/src/coreclr/tools/dotnet-pgo/SPGO/SampleCorrelator.cs index d6cef34e31da53..4fd1047c6b8b46 100644 --- a/src/coreclr/tools/dotnet-pgo/SPGO/SampleCorrelator.cs +++ b/src/coreclr/tools/dotnet-pgo/SPGO/SampleCorrelator.cs @@ -63,10 +63,10 @@ private PerMethodInfo GetOrCreateInfo(MethodDesc md) public SampleProfile GetProfile(MethodDesc md) => _methodInf.GetValueOrDefault(md)?.Profile; - public void SmootheAllProfiles() + public void SmoothAllProfiles() { foreach (PerMethodInfo pmi in _methodInf.Values) - pmi.Profile.SmootheFlow(); + pmi.Profile.SmoothFlow(); } public void AttributeSamplesToIP(ulong ip, long numSamples) diff --git a/src/coreclr/tools/dotnet-pgo/SPGO/SampleProfile.cs b/src/coreclr/tools/dotnet-pgo/SPGO/SampleProfile.cs index 856f5291888953..d156cc19c49d7b 100644 --- a/src/coreclr/tools/dotnet-pgo/SPGO/SampleProfile.cs +++ b/src/coreclr/tools/dotnet-pgo/SPGO/SampleProfile.cs @@ -52,7 +52,7 @@ public void AttributeSamples(BasicBlock bb, long count) AttributedSamples += count; } - public void SmootheFlow() + public void SmoothFlow() { foreach (BasicBlock bb in FlowGraph.BasicBlocks) {