diff --git a/src/coreclr/scripts/superpmi.py b/src/coreclr/scripts/superpmi.py index 0e3c9900c9397a..1c7f2f22a76f42 100755 --- a/src/coreclr/scripts/superpmi.py +++ b/src/coreclr/scripts/superpmi.py @@ -497,9 +497,6 @@ def collect(self): # The base .MCH file path self.base_mch_file = None - # No dup .MCH file path - self.nodup_mch_file = None - # Final .MCH file path self.final_mch_file = None @@ -510,12 +507,11 @@ def collect(self): # Do a basic SuperPMI collect and validation: # 1. Collect MC files by running a set of sample apps. - # 2. Merge the MC files into a single MCH using "mcs -merge *.mc -recursive". - # 3. Create a thin unique MCH by using "mcs -removeDup -thin". - # 4. Create a clean MCH by running SuperPMI over the MCH, and using "mcs -strip" to filter + # 2. Create a merged thin unique MCH by using "mcs -merge -recursive -dedup -thin base.mch *.mc". + # 3. Create a clean MCH by running SuperPMI over the MCH, and using "mcs -strip" to filter # out any failures (if any). - # 5. Create a TOC using "mcs -toc". - # 6. Verify the resulting MCH file is error-free when running SuperPMI against it with the + # 4. Create a TOC using "mcs -toc". + # 5. Verify the resulting MCH file is error-free when running SuperPMI against it with the # same JIT used for collection. # # MCH files are big. If we don't need them anymore, clean them up right away to avoid @@ -528,7 +524,6 @@ def collect(self): # Setup all of the temp locations self.base_fail_mcl_file = os.path.join(temp_location, "basefail.mcl") self.base_mch_file = os.path.join(temp_location, "base.mch") - self.nodup_mch_file = os.path.join(temp_location, "nodup.mch") self.temp_location = temp_location @@ -563,7 +558,6 @@ def collect(self): self.__merge_mch_files__() if not self.coreclr_args.has_verified_clean_mch: - self.__create_thin_unique_mch__() self.__create_clean_mch_file__() self.__create_toc__() self.__verify_final_mch__() @@ -694,13 +688,13 @@ def __merge_mc_files__(self): """ Merge the mc files that were generated Notes: - mcs -merge \*.mc -recursive + mcs -merge \*.mc -recursive -dedup -thin """ pattern = os.path.join(self.temp_location, "*.mc") - command = [self.mcs_path, "-merge", self.base_mch_file, pattern, "-recursive"] + command = [self.mcs_path, "-merge", self.base_mch_file, pattern, "-recursive", "-dedup", "-thin"] print("Invoking: " + " ".join(command)) proc = subprocess.Popen(command) proc.communicate() @@ -732,51 +726,32 @@ def __merge_mch_files__(self): if not os.path.isfile(self.base_mch_file): raise RuntimeError("MCH file failed to be generated at: %s" % self.base_mch_file) - def __create_thin_unique_mch__(self): - """ Create a thin unique MCH - - Notes: - -removeDup -thin - """ - - command = [self.mcs_path, "-removeDup", "-thin", self.base_mch_file, self.nodup_mch_file] - print("Invoking: " + " ".join(command)) - proc = subprocess.Popen(command) - proc.communicate() - - if not os.path.isfile(self.nodup_mch_file): - raise RuntimeError("Error, no dup mch file not created correctly at: %s" % self.nodup_mch_file) - - if not self.coreclr_args.skip_cleanup: - os.remove(self.base_mch_file) - self.base_mch_file = None - def __create_clean_mch_file__(self): """ Create a clean mch file Notes: - -p -f + -p -f if is non-empty: - -strip + -strip else - # copy/move nodup file to final file + # copy/move base file to final file del """ - command = [self.superpmi_path, "-p", "-f", self.base_fail_mcl_file, self.nodup_mch_file, self.jit_path] + command = [self.superpmi_path, "-p", "-f", self.base_fail_mcl_file, self.base_mch_file, self.jit_path] print("Invoking: " + " ".join(command)) proc = subprocess.Popen(command) proc.communicate() if is_nonzero_length_file(self.base_fail_mcl_file): - command = [self.mcs_path, "-strip", self.base_fail_mcl_file, self.nodup_mch_file, self.final_mch_file] + command = [self.mcs_path, "-strip", self.base_fail_mcl_file, self.base_mch_file, self.final_mch_file] print("Invoking: " + " ".join(command)) proc = subprocess.Popen(command) proc.communicate() else: # Ideally we could just rename this file instead of copying it. - shutil.copy2(self.nodup_mch_file, self.final_mch_file) + shutil.copy2(self.base_mch_file, self.final_mch_file) if not os.path.isfile(self.final_mch_file): raise RuntimeError("Final mch file failed to be generated.") @@ -785,9 +760,9 @@ def __create_clean_mch_file__(self): if os.path.isfile(self.base_fail_mcl_file): os.remove(self.base_fail_mcl_file) self.base_fail_mcl_file = None - if os.path.isfile(self.nodup_mch_file): - os.remove(self.nodup_mch_file) - self.nodup_mch_file = None + if os.path.isfile(self.base_mch_file): + os.remove(self.base_mch_file) + self.base_mch_file = None def __create_toc__(self): """ Create a TOC file diff --git a/src/coreclr/src/ToolBox/superpmi/mcs/CMakeLists.txt b/src/coreclr/src/ToolBox/superpmi/mcs/CMakeLists.txt index b20ab76c77ec1f..7c82afea565fbb 100644 --- a/src/coreclr/src/ToolBox/superpmi/mcs/CMakeLists.txt +++ b/src/coreclr/src/ToolBox/superpmi/mcs/CMakeLists.txt @@ -14,6 +14,7 @@ include_directories(../superpmi-shared) set(MCS_SOURCES commandline.cpp mcs.cpp + removedup.cpp verbasmdump.cpp verbconcat.cpp verbdump.cpp @@ -31,6 +32,7 @@ set(MCS_SOURCES ../superpmi-shared/callutils.cpp ../superpmi-shared/compileresult.cpp ../superpmi-shared/errorhandling.cpp + ../superpmi-shared/hash.cpp ../superpmi-shared/logging.cpp ../superpmi-shared/mclist.cpp ../superpmi-shared/methodcontext.cpp diff --git a/src/coreclr/src/ToolBox/superpmi/mcs/commandline.cpp b/src/coreclr/src/ToolBox/superpmi/mcs/commandline.cpp index a6e74a9934a780..30fff554ca69c9 100644 --- a/src/coreclr/src/ToolBox/superpmi/mcs/commandline.cpp +++ b/src/coreclr/src/ToolBox/superpmi/mcs/commandline.cpp @@ -80,17 +80,21 @@ void CommandLine::DumpHelp(const char* program) printf(" Check the integrity of each methodContext\n"); printf(" e.g. -integ a.mc\n"); printf("\n"); - printf(" -merge outputfile pattern\n"); + printf(" -merge outputfile pattern [-dedup [-thin]]\n"); printf(" Merge all the input files matching the pattern.\n"); + printf(" With -dedup, skip duplicates when copying (like using -removeDup). With -thin, also strip CompileResults.\n"); printf(" e.g. -merge a.mch *.mc\n"); printf(" e.g. -merge a.mch c:\\foo\\bar\\*.mc\n"); printf(" e.g. -merge a.mch relpath\\*.mc\n"); printf(" e.g. -merge a.mch .\n"); printf(" e.g. -merge a.mch onedir\n"); + printf(" e.g. -merge a.mch *.mc -dedup -thin\n"); printf("\n"); - printf(" -merge outputfile pattern -recursive\n"); + printf(" -merge outputfile pattern -recursive [-dedup [-thin]]\n"); printf(" Merge all the input files matching the pattern, in the specified and all child directories.\n"); + printf(" With -dedup, skip duplicates when copying (like using -removeDup). With -thin, also strip CompileResults.\n"); printf(" e.g. -merge a.mch *.mc -recursive\n"); + printf(" e.g. -merge a.mch *.mc -recursive -dedup -thin\n"); printf("\n"); printf(" -removeDup inputfile outputfile\n"); printf(" Copy methodContexts from inputfile to outputfile, skipping duplicates.\n"); @@ -246,6 +250,11 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o) tempLen = strlen(argv[i]); o->recursive = true; } + else if ((_strnicmp(&argv[i][1], "dedup", argLen) == 0)) + { + tempLen = strlen(argv[i]); + o->dedup = true; + } else if ((_strnicmp(&argv[i][1], "toc", argLen) == 0)) { tempLen = strlen(argv[i]); @@ -390,6 +399,38 @@ bool CommandLine::Parse(int argc, char* argv[], /* OUT */ Options* o) } } + if (o->dedup) + { + if (!o->actionMerge) + { + LogError("CommandLine::Parse() '-dedup' requires -merge."); + DumpHelp(argv[0]); + return false; + } + } + + if (o->stripCR) + { + if (o->actionMerge) + { + if (!o->dedup) + { + LogError("CommandLine::Parse() '-thin' in '-merge' requires -dedup."); + DumpHelp(argv[0]); + return false; + } + } + else if (o->actionRemoveDup || o->actionStrip || o->actionFracture || o->actionCopy) + { + } + else + { + LogError("CommandLine::Parse() '-thin' requires -merge, -removeDup, -strip, -fracture, or -copy."); + DumpHelp(argv[0]); + return false; + } + } + if (o->actionASMDump) { if ((!foundFile1) || (!foundFile2)) diff --git a/src/coreclr/src/ToolBox/superpmi/mcs/commandline.h b/src/coreclr/src/ToolBox/superpmi/mcs/commandline.h index 4e594934d253f3..4b18ae9eb156f5 100644 --- a/src/coreclr/src/ToolBox/superpmi/mcs/commandline.h +++ b/src/coreclr/src/ToolBox/superpmi/mcs/commandline.h @@ -32,6 +32,7 @@ class CommandLine , actionTOC(false) , legacyCompare(false) , recursive(false) + , dedup(false) , stripCR(false) , nameOfFile1(nullptr) , nameOfFile2(nullptr) @@ -57,6 +58,7 @@ class CommandLine bool actionTOC; bool legacyCompare; bool recursive; + bool dedup; bool stripCR; char* nameOfFile1; char* nameOfFile2; diff --git a/src/coreclr/src/ToolBox/superpmi/mcs/mcs.cpp b/src/coreclr/src/ToolBox/superpmi/mcs/mcs.cpp index c1815341e55fe7..3c4e2dfb0d7bd2 100644 --- a/src/coreclr/src/ToolBox/superpmi/mcs/mcs.cpp +++ b/src/coreclr/src/ToolBox/superpmi/mcs/mcs.cpp @@ -51,7 +51,7 @@ int __cdecl main(int argc, char* argv[]) } if (o.actionMerge) { - exitCode = verbMerge::DoWork(o.nameOfFile1, o.nameOfFile2, o.recursive); + exitCode = verbMerge::DoWork(o.nameOfFile1, o.nameOfFile2, o.recursive, o.dedup, o.stripCR); } if (o.actionCopy) { diff --git a/src/coreclr/src/ToolBox/superpmi/mcs/removedup.cpp b/src/coreclr/src/ToolBox/superpmi/mcs/removedup.cpp new file mode 100644 index 00000000000000..b2fa9a202d9088 --- /dev/null +++ b/src/coreclr/src/ToolBox/superpmi/mcs/removedup.cpp @@ -0,0 +1,169 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +#include "standardpch.h" +#include "verbremovedup.h" +#include "simpletimer.h" +#include "lightweightmap.h" +#include "methodcontext.h" +#include "methodcontextiterator.h" +#include "removedup.h" + +RemoveDup::~RemoveDup() +{ + if (m_cleanup) + { + if (m_inFile != nullptr) + { + for (int i = 0; i < (int)m_inFile->GetCount(); i++) + { + DenseLightWeightMap* md5HashMap = m_inFile->GetItem(i); + if (md5HashMap != nullptr) + { + // go through and delete items + for (int j = 0; j < (int)md5HashMap->GetCount(); j++) + { + char* p = md5HashMap->GetItem(j); + delete[] p; + } + delete md5HashMap; + } + } + delete m_inFile; + m_inFile = nullptr; + } + if (m_inFileLegacy != nullptr) + { + for (int i = 0; i < (int)m_inFileLegacy->GetCount(); i++) + { + DenseLightWeightMap* md5HashMap = m_inFileLegacy->GetItem(i); + if (md5HashMap != nullptr) + { + // go through and delete items + for (int j = 0; j < (int)md5HashMap->GetCount(); j++) + { + MethodContext* p = md5HashMap->GetItem(j); + delete p; + } + delete md5HashMap; + } + } + delete m_inFileLegacy; + m_inFileLegacy = nullptr; + } + } +} + +bool RemoveDup::unique(MethodContext* mc) +{ + if (m_inFile == nullptr) + m_inFile = new LightWeightMap*>(); + + CORINFO_METHOD_INFO newInfo; + unsigned newFlags = 0; + mc->repCompileMethod(&newInfo, &newFlags); + + // Assume that there are lots of duplicates, so don't allocate a new buffer for the MD5 hash data + // until we know we're going to add it to the map. + char md5Buff[MD5_HASH_BUFFER_SIZE]; + mc->dumpMethodMD5HashToBuffer(md5Buff, MD5_HASH_BUFFER_SIZE, /* ignoreMethodName */ true, &newInfo, newFlags); + + if (m_inFile->GetIndex(newInfo.ILCodeSize) == -1) + m_inFile->Add(newInfo.ILCodeSize, new DenseLightWeightMap()); + + DenseLightWeightMap* ourRank = m_inFile->Get(newInfo.ILCodeSize); + + for (unsigned i = 0; i < ourRank->GetCount(); i++) + { + char* md5Buff2 = ourRank->Get(i); + if (strncmp(md5Buff, md5Buff2, MD5_HASH_BUFFER_SIZE) == 0) + { + return false; + } + } + + char* newmd5Buff = new char[MD5_HASH_BUFFER_SIZE]; + memcpy(newmd5Buff, md5Buff, MD5_HASH_BUFFER_SIZE); + ourRank->Append(newmd5Buff); + return true; +} + +bool RemoveDup::uniqueLegacy(MethodContext* mc) +{ + if (m_inFileLegacy == nullptr) + m_inFileLegacy = new LightWeightMap*>(); + + CORINFO_METHOD_INFO newInfo; + unsigned newFlags = 0; + mc->repCompileMethod(&newInfo, &newFlags); + + if (m_inFileLegacy->GetIndex(newInfo.ILCodeSize) == -1) + m_inFileLegacy->Add(newInfo.ILCodeSize, new DenseLightWeightMap()); + + DenseLightWeightMap* ourRank = m_inFileLegacy->Get(newInfo.ILCodeSize); + + for (unsigned i = 0; i < ourRank->GetCount(); i++) + { + MethodContext* scratch = ourRank->Get(i); + if (mc->Equal(scratch)) + { + return false; + } + } + + // We store the MethodContext in our map. + ourRank->Append(mc); + return true; +} + +bool RemoveDup::CopyAndRemoveDups(const char* nameOfInput, HANDLE hFileOut) +{ + MethodContextIterator mci(/* progressReport */ true); + if (!mci.Initialize(nameOfInput)) + return false; + + int savedCount = 0; + + while (mci.MoveNext()) + { + MethodContext* mc = mci.CurrentTakeOwnership(); + if (m_stripCR) + { + delete mc->cr; + mc->cr = new CompileResult(); + } + if (m_legacyCompare) + { + if (uniqueLegacy(mc)) + { + mc->saveToFile(hFileOut); + savedCount++; + + // In this case, for the legacy comparer, it has placed the 'mc' in the 'm_inFileLegacy' table, so we + // can't delete it. + } + else + { + delete mc; // we no longer need this + } + } + else + { + if (unique(mc)) + { + mc->saveToFile(hFileOut); + savedCount++; + } + delete mc; // we no longer need this + } + } + + LogInfo("Loaded %d, Saved %d", mci.MethodContextNumber(), savedCount); + + if (!mci.Destroy()) + return false; + + return true; +} diff --git a/src/coreclr/src/ToolBox/superpmi/mcs/removedup.h b/src/coreclr/src/ToolBox/superpmi/mcs/removedup.h new file mode 100644 index 00000000000000..9933826e1a09b1 --- /dev/null +++ b/src/coreclr/src/ToolBox/superpmi/mcs/removedup.h @@ -0,0 +1,63 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +//---------------------------------------------------------- +// RemoveDup.h - Functions to remove dups from a method context hive (MCH) +//---------------------------------------------------------- +#ifndef _RemoveDup +#define _RemoveDup + +#include "methodcontext.h" +#include "lightweightmap.h" + +class RemoveDup +{ +public: + + RemoveDup() + : m_stripCR(false) + , m_legacyCompare(false) + , m_cleanup(false) + , m_inFile(nullptr) + , m_inFileLegacy(nullptr) + {} + + bool Initialize(bool stripCR = false, bool legacyCompare = false, bool cleanup = true) + { + m_stripCR = stripCR; + m_legacyCompare = legacyCompare; + m_cleanup = cleanup; + m_inFile = nullptr; + m_inFileLegacy = nullptr; + + return true; + } + + ~RemoveDup(); + + bool CopyAndRemoveDups(const char* nameOfInput, HANDLE hFileOut); + +private: + + bool m_stripCR; // 'true' if we remove CompileResults when removing duplicates. + bool m_legacyCompare; // 'true' to use the legacy comparer. + + // If false, we don't spend time cleaning up the `m_inFile` and `m_inFileLegacy` + // data structures. Only set it to `false` if you're ok with memory leaks, e.g., + // if the process will exit soon afterwards. + bool m_cleanup; + + // We use a hash to limit the number of comparisons we need to do. + // The first level key to our hash map is ILCodeSize and the second + // level map key is just an index and the value is an existing MC Hash. + + LightWeightMap*>* m_inFile; + LightWeightMap*>* m_inFileLegacy; + + bool unique(MethodContext* mc); + bool uniqueLegacy(MethodContext* mc); +}; + +#endif diff --git a/src/coreclr/src/ToolBox/superpmi/mcs/verbmerge.cpp b/src/coreclr/src/ToolBox/superpmi/mcs/verbmerge.cpp index c1cb225485a7f8..618a418a1e9434 100644 --- a/src/coreclr/src/ToolBox/superpmi/mcs/verbmerge.cpp +++ b/src/coreclr/src/ToolBox/superpmi/mcs/verbmerge.cpp @@ -11,6 +11,9 @@ // Do reads/writes in large 256MB chunks. #define BUFFER_SIZE 0x10000000 +// static member +RemoveDup verbMerge::m_removeDups; + // MergePathStrings: take two file system path components, compose them together, and return the merged pathname string. // The caller must delete the returned string with delete[]. // @@ -36,20 +39,21 @@ char* verbMerge::ConvertWideCharToMultiByte(LPCWSTR wstr) return encodedStr; } -// AppendFile: append the file named by 'fileName' to the output file referred to by 'hFileOut'. The 'hFileOut' +// AppendFileRaw: append the file named by 'fileFullPath' to the output file referred to by 'hFileOut'. The 'hFileOut' // handle is assumed to be open, and the file position is assumed to be at the correct spot for writing, to append. +// The file is simply appended. // // 'buffer' is memory that can be used to do reading/buffering. // // static -int verbMerge::AppendFile(HANDLE hFileOut, LPCWSTR fileName, unsigned char* buffer, size_t bufferSize) +int verbMerge::AppendFileRaw(HANDLE hFileOut, LPCWSTR fileFullPath, unsigned char* buffer, size_t bufferSize) { int result = 0; // default to zero == success - char* fileNameAsChar = ConvertWideCharToMultiByte(fileName); + char* fileNameAsChar = ConvertWideCharToMultiByte(fileFullPath); LogInfo("Appending file '%s'", fileNameAsChar); - HANDLE hFileIn = CreateFileW(fileName, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, + HANDLE hFileIn = CreateFileW(fileFullPath, GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, NULL); if (hFileIn == INVALID_HANDLE_VALUE) { @@ -106,6 +110,37 @@ int verbMerge::AppendFile(HANDLE hFileOut, LPCWSTR fileName, unsigned char* buff return result; } +// AppendFile: append the file named by 'fileFullPath' to the output file referred to by 'hFileOut'. The 'hFileOut' +// handle is assumed to be open, and the file position is assumed to be at the correct spot for writing, to append. +// +// 'buffer' is memory that can be used to do reading/buffering. +// +// static +int verbMerge::AppendFile(HANDLE hFileOut, LPCWSTR fileFullPath, bool dedup, unsigned char* buffer, size_t bufferSize) +{ + int result = 0; // default to zero == success + + if (dedup) + { + // Need to conver the fileFullPath to non-Unicode. + char* fileFullPathAsChar = ConvertWideCharToMultiByte(fileFullPath); + LogInfo("Appending file '%s'", fileFullPathAsChar); + bool ok = m_removeDups.CopyAndRemoveDups(fileFullPathAsChar, hFileOut); + delete[] fileFullPathAsChar; + if (!ok) + { + LogError("Failed to remove dups"); + return -1; + } + } + else + { + result = AppendFileRaw(hFileOut, fileFullPath, buffer, bufferSize); + } + + return result; +} + // Return true if this is a directory // // static @@ -294,6 +329,7 @@ int verbMerge::AppendAllInDir(HANDLE hFileOut, unsigned char* buffer, size_t bufferSize, bool recursive, + bool dedup, /* out */ LONGLONG* size) { int result = 0; // default to zero == success @@ -316,7 +352,11 @@ int verbMerge::AppendAllInDir(HANDLE hFileOut, if (wcslen(fileFullPath) > MAX_PATH) // This path is too long, use \\?\ to access it. { - assert(wcscmp(dir, W(".")) != 0 && "can't access the relative path with UNC"); + if (wcscmp(dir, W(".")) == 0) + { + LogError("can't access the relative path with UNC"); + goto CLEAN_UP; + } LPWSTR newBuffer = new WCHAR[wcslen(fileFullPath) + 30]; wcscpy(newBuffer, W("\\\\?\\")); if (*fileFullPath == '\\') // It is UNC path, use \\?\UNC\serverName to access it. @@ -347,7 +387,7 @@ int verbMerge::AppendAllInDir(HANDLE hFileOut, } else { - result = AppendFile(hFileOut, fileFullPath, buffer, bufferSize); + result = AppendFile(hFileOut, fileFullPath, dedup, buffer, bufferSize); if (result != 0) { // Error was already logged. @@ -381,7 +421,7 @@ int verbMerge::AppendAllInDir(HANDLE hFileOut, const _WIN32_FIND_DATAW& findData = fileArray[i]; LPWSTR fileFullPath = MergePathStrings(dir, findData.cFileName); - result = AppendAllInDir(hFileOut, fileFullPath, file, buffer, bufferSize, recursive, &dirSize); + result = AppendAllInDir(hFileOut, fileFullPath, file, buffer, bufferSize, recursive, dedup, &dirSize); delete[] fileFullPath; if (result != 0) { @@ -416,14 +456,27 @@ int verbMerge::AppendAllInDir(HANDLE hFileOut, // If "recursive" is true, then the pattern is searched for in the specified directory (or implicit current directory) // and all sub-directories, recursively. // +// If "dedup" is true, the we remove duplicates while we are merging. If "stripCR" is also true, we remove CompileResults +// while deduplicating. +// // static -int verbMerge::DoWork(const char* nameOfOutputFile, const char* pattern, bool recursive) +int verbMerge::DoWork(const char* nameOfOutputFile, const char* pattern, bool recursive, bool dedup, bool stripCR) { int result = 0; // default to zero == success SimpleTimer st1; LogInfo("Merging files matching '%s' into '%s'", pattern, nameOfOutputFile); + if (dedup) + { + // Initialize the deduplication object + if (!m_removeDups.Initialize(stripCR, /* legacyCompare */ false, /* cleanup */ false)) + { + LogError("Failed to initialize the deduplicator"); + return -1; + } + } + int nameLength = (int)strlen(nameOfOutputFile) + 1; LPWSTR nameOfOutputFileAsWchar = new WCHAR[nameLength]; MultiByteToWideChar(CP_ACP, 0, nameOfOutputFile, -1, nameOfOutputFileAsWchar, nameLength); @@ -491,7 +544,7 @@ int verbMerge::DoWork(const char* nameOfOutputFile, const char* pattern, bool re st1.Start(); - result = AppendAllInDir(hFileOut, dir, file, buffer, BUFFER_SIZE, recursive, &dirSize); + result = AppendAllInDir(hFileOut, dir, file, buffer, BUFFER_SIZE, recursive, dedup, &dirSize); if (result != 0) { goto CLEAN_UP; diff --git a/src/coreclr/src/ToolBox/superpmi/mcs/verbmerge.h b/src/coreclr/src/ToolBox/superpmi/mcs/verbmerge.h index 923c51393d846e..b89eec4e76bafd 100644 --- a/src/coreclr/src/ToolBox/superpmi/mcs/verbmerge.h +++ b/src/coreclr/src/ToolBox/superpmi/mcs/verbmerge.h @@ -10,10 +10,12 @@ #ifndef _verbMerge #define _verbMerge +#include "removedup.h" + class verbMerge { public: - static int DoWork(const char* nameOfOutputFile, const char* pattern, bool recursive); + static int DoWork(const char* nameOfOutputFile, const char* pattern, bool recursive, bool dedup, bool stripCR); private: typedef bool (*DirectoryFilterFunction_t)(WIN32_FIND_DATAW*); @@ -29,13 +31,17 @@ class verbMerge static char* ConvertWideCharToMultiByte(LPCWSTR wstr); - static int AppendFile(HANDLE hFileOut, LPCWSTR fileName, unsigned char* buffer, size_t bufferSize); + static int AppendFileRaw(HANDLE hFileOut, LPCWSTR fileFullPath, unsigned char* buffer, size_t bufferSize); + static int AppendFile(HANDLE hFileOut, LPCWSTR fileFullPath, bool dedup, unsigned char* buffer, size_t bufferSize); static int AppendAllInDir(HANDLE hFileOut, LPCWSTR dir, LPCWSTR file, unsigned char* buffer, size_t bufferSize, bool recursive, + bool dedup, /* out */ LONGLONG* size); + + static RemoveDup m_removeDups; }; #endif diff --git a/src/coreclr/src/ToolBox/superpmi/mcs/verbremovedup.cpp b/src/coreclr/src/ToolBox/superpmi/mcs/verbremovedup.cpp index 10a4e6c21f6007..6fb10a6ee281b6 100644 --- a/src/coreclr/src/ToolBox/superpmi/mcs/verbremovedup.cpp +++ b/src/coreclr/src/ToolBox/superpmi/mcs/verbremovedup.cpp @@ -5,89 +5,12 @@ #include "standardpch.h" #include "verbremovedup.h" -#include "simpletimer.h" -#include "lightweightmap.h" -#include "methodcontext.h" -#include "methodcontextiterator.h" - -// We use a hash to limit the number of comparisons we need to do. -// The first level key to our hash map is ILCodeSize and the second -// level map key is just an index and the value is an existing MC Hash. - -LightWeightMap*>* inFile = nullptr; - -bool unique(MethodContext* mc) -{ - if (inFile == nullptr) - inFile = new LightWeightMap*>(); - - CORINFO_METHOD_INFO newInfo; - unsigned newFlags = 0; - mc->repCompileMethod(&newInfo, &newFlags); - - char* md5Buff = new char[MD5_HASH_BUFFER_SIZE]; - mc->dumpMethodMD5HashToBuffer(md5Buff, MD5_HASH_BUFFER_SIZE, /* ignoreMethodName */ true); - - if (inFile->GetIndex(newInfo.ILCodeSize) == -1) - inFile->Add(newInfo.ILCodeSize, new DenseLightWeightMap()); - - DenseLightWeightMap* ourRank = inFile->Get(newInfo.ILCodeSize); - - for (int i = 0; i < (int)ourRank->GetCount(); i++) - { - char* md5Buff2 = ourRank->Get(i); - - if (strncmp(md5Buff, md5Buff2, MD5_HASH_BUFFER_SIZE) == 0) - { - delete[] md5Buff; - return false; - } - } - - ourRank->Append(md5Buff); - return true; -} - -LightWeightMap*>* inFileLegacy = nullptr; - -bool uniqueLegacy(MethodContext* mc) -{ - if (inFileLegacy == nullptr) - inFileLegacy = new LightWeightMap*>(); - - CORINFO_METHOD_INFO newInfo; - unsigned newFlags = 0; - mc->repCompileMethod(&newInfo, &newFlags); - - if (inFileLegacy->GetIndex(newInfo.ILCodeSize) == -1) - inFileLegacy->Add(newInfo.ILCodeSize, new DenseLightWeightMap()); - - DenseLightWeightMap* ourRank = inFileLegacy->Get(newInfo.ILCodeSize); - - for (int i = 0; i < (int)ourRank->GetCount(); i++) - { - MethodContext* scratch = ourRank->Get(i); - if (mc->Equal(scratch)) - { - return false; - } - } - - // We store the MethodContext in our map. - ourRank->Append(mc); - return true; -} +#include "removedup.h" int verbRemoveDup::DoWork(const char* nameOfInput, const char* nameOfOutput, bool stripCR, bool legacyCompare) { LogVerbose("Removing duplicates from '%s', writing to '%s'", nameOfInput, nameOfOutput); - MethodContextIterator mci(true); - if (!mci.Initialize(nameOfInput)) - return -1; - - int savedCount = 0; - HANDLE hFileOut = CreateFileA(nameOfOutput, GENERIC_WRITE, 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL | FILE_FLAG_SEQUENTIAL_SCAN, NULL); if (hFileOut == INVALID_HANDLE_VALUE) @@ -96,52 +19,19 @@ int verbRemoveDup::DoWork(const char* nameOfInput, const char* nameOfOutput, boo return -1; } - while (mci.MoveNext()) + RemoveDup removeDups; + if (!removeDups.Initialize(stripCR, legacyCompare, /* cleanup */ false) + || !removeDups.CopyAndRemoveDups(nameOfInput, hFileOut)) { - MethodContext* mc = mci.CurrentTakeOwnership(); - if (stripCR) - { - delete mc->cr; - mc->cr = new CompileResult(); - } - if (legacyCompare) - { - if (uniqueLegacy(mc)) - { - mc->saveToFile(hFileOut); - savedCount++; - - // In this case, for the legacy comparer, it has placed the 'mc' in the 'inFileLegacy' table, so we - // can't delete it. - } - else - { - delete mc; // we no longer need this - } - } - else - { - if (unique(mc)) - { - mc->saveToFile(hFileOut); - savedCount++; - } - delete mc; // we no longer need this - } + LogError("Failed to remove dups"); + return -1; } - // We're leaking 'inFile' or 'inFileLegacy', but the process is going away, so it shouldn't matter. - if (CloseHandle(hFileOut) == 0) { - LogError("2nd CloseHandle failed. GetLastError()=%u", GetLastError()); + LogError("CloseHandle failed. GetLastError()=%u", GetLastError()); return -1; } - LogInfo("Loaded %d, Saved %d", mci.MethodContextNumber(), savedCount); - - if (!mci.Destroy()) - return -1; - return 0; } diff --git a/src/coreclr/src/ToolBox/superpmi/readme.md b/src/coreclr/src/ToolBox/superpmi/readme.md index bc8ffa31436d75..7b6be5693b9302 100644 --- a/src/coreclr/src/ToolBox/superpmi/readme.md +++ b/src/coreclr/src/ToolBox/superpmi/readme.md @@ -103,7 +103,7 @@ replay. (MCH stands for "method context hive".) equivalent, such as trivial class constructors, or if some functions are compiled multiple times in different scenarios or tests. We filter out the duplicates, which makes playback much faster, and the resultant MCH file much -smaller. +smaller. This can be done as part of the "merge" step. 4. Create a "clean" .MCH with no SuperPMI failures. The original collected MCH file might not replay cleanly. This is generally due to existing, un-investigated SuperPMI bugs or limitations. We don't want to see these during normal playback, @@ -166,7 +166,7 @@ directory (specified by `SuperPMIShimLogPath`). Merge the generated .MC files using the `mcs` tool: ``` -mcs -merge base.mch *.mc -recursive +mcs -merge base.mch *.mc -recursive -dedup -thin ``` This assumes the current directory is the root directory where the .MC files @@ -174,14 +174,18 @@ were placed, namely the directory specified as `SuperPMIShimLogPath` above. You can also specify a directory prefix to the file system regular expression. The `-recursive` flag is only necessary if .MC files also exist in subdirectories of this, and you want those also added to the resultant, collected `base.mch` -file. So, for the example above, you might use: +file. The `-dedup` and `-thin` options remove duplicates as the files are merged, +and remove the normally-unused CompileResults (e.g., the code generated during +the initial collection). + +So, for the example above, you might use: ``` -f:\gh\runtime\artifacts\bin\coreclr\Windows_NT.x64.Checked\mcs.exe -merge f:\spmi\base.mch f:\spmi\temp\*.mc -recursive +f:\gh\runtime\artifacts\bin\coreclr\Windows_NT.x64.Checked\mcs.exe -merge f:\spmi\base.mch f:\spmi\temp\*.mc -recursive -dedup -thin ``` -Note that `mcs -merge` is literally just a file concatenation of many files into -one, so this step will double the required disk space. +Note that `mcs -merge` without `-dedup -thin` is literally just a file concatenation +of many files into one, which would double the required disk space. After this step, you can remove all the individual .MC files unless you want to keep them to debug the SuperPMI collection process itself. @@ -190,7 +194,8 @@ to keep them to debug the SuperPMI collection process itself. ## Remove duplicates in the .MCH file One benefit of SuperPMI is the ability to remove duplicated compilations, so -on replay only unique functions are compiled. Use the following to create a +on replay only unique functions are compiled. If you didn't use the `-merge -dedup -thin` +option above, you can do the deduplication separately, using the following to create a "unique" set of functions: ``` @@ -218,6 +223,9 @@ As stated above, due to various bugs or otherwise uninvestigated issues, a Super replay of the unique.mch file might contain errors. We don't want that, so we filter out those errors in a "baseline" run, as follows. +(Note that if you are following the steps above, you most likely deduplicated +during the merge operation, so you have a `base.mch` file, not a `unique.mch` file.) + ``` superpmi -p -f basefail.mcl unique.mch clrjit.dll mcs.exe -strip basefail.mcl unique.mch final.mch diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/hash.cpp b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/hash.cpp new file mode 100644 index 00000000000000..bfe5d64bcf1ad3 --- /dev/null +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/hash.cpp @@ -0,0 +1,199 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +//---------------------------------------------------------- +// hash.cpp - Class for hashing a text stream using MD5 hashing +// +// Note that on Windows, acquiring the Crypto hash provider is expensive, so +// only do that once and cache it. +//---------------------------------------------------------- + +#include "standardpch.h" +#include "runtimedetails.h" +#include "errorhandling.h" +#include "md5.h" +#include "hash.h" + +Hash::Hash() +#ifndef TARGET_UNIX + : m_Initialized(false) + , m_hCryptProv(NULL) +#endif // !TARGET_UNIX +{ +} + +Hash::~Hash() +{ + Destroy(); // Ignoring return code. +} + +// static +bool Hash::Initialize() +{ +#ifdef TARGET_UNIX + + // No initialization necessary. + return true; + +#else // !TARGET_UNIX + + if (m_Initialized) + { + LogError("Hash class has already been initialized"); + return false; + } + + // Get handle to the crypto provider + if (!CryptAcquireContextA(&m_hCryptProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) + goto OnError; + + m_Initialized = true; + return true; + +OnError: + LogError("Failed to create a hash using the Crypto API (Error 0x%X)", GetLastError()); + + if (m_hCryptProv != NULL) + CryptReleaseContext(m_hCryptProv, 0); + + m_Initialized = false; + return false; + +#endif // !TARGET_UNIX +} + +// static +bool Hash::Destroy() +{ +#ifdef TARGET_UNIX + + // No destruction necessary. + return true; + +#else // !TARGET_UNIX + + // Should probably check Crypt() function return codes. + if (m_hCryptProv != NULL) + { + CryptReleaseContext(m_hCryptProv, 0); + m_hCryptProv = NULL; + } + + m_Initialized = false; + return true; + +#endif // !TARGET_UNIX +} + +// Hash::WriteHashValueAsText - Take a binary hash value in the array of bytes pointed to by +// 'pHash' (size in bytes 'cbHash'), and write an ASCII hexadecimal representation of it in the buffer +// 'hashTextBuffer' (size in bytes 'hashTextBufferLen'). +// +// Returns true on success, false on failure (only if the arguments are bad). +bool Hash::WriteHashValueAsText(const BYTE* pHash, size_t cbHash, char* hashTextBuffer, size_t hashTextBufferLen) +{ + // This could be: + // + // for (DWORD i = 0; i < MD5_HASH_BYTE_SIZE; i++) + // { + // sprintf_s(hash + i * 2, hashLen - i * 2, "%02X", bHash[i]); + // } + // + // But this function is hot, and sprintf_s is too slow. This is a specialized function to speed it up. + + if (hashTextBufferLen < 2 * cbHash + 1) // 2 characters for each byte, plus null terminator + { + LogError("WriteHashValueAsText doesn't have enough space to write the output"); + return false; + } + + static const char hexDigits[] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + char* pCur = hashTextBuffer; + for (size_t i = 0; i < cbHash; i++) + { + unsigned digit = pHash[i]; + unsigned lowNibble = digit & 0xF; + unsigned highNibble = digit >> 4; + *pCur++ = hexDigits[highNibble]; + *pCur++ = hexDigits[lowNibble]; + } + *pCur++ = '\0'; + return true; +} + +// Hash::HashBuffer - Compute an MD5 hash of the data pointed to by 'pBuffer', of 'bufLen' bytes, +// writing the hexadecimal ASCII text representation of the hash to the buffer pointed to by 'hash', +// of 'hashLen' bytes in size, which must be at least MD5_HASH_BUFFER_SIZE bytes. +// +// Returns the number of bytes written, or -1 on error. +int Hash::HashBuffer(BYTE* pBuffer, size_t bufLen, char* hash, size_t hashLen) +{ +#ifdef TARGET_UNIX + + MD5HASHDATA md5_hashdata; + MD5 md5_hasher; + + if (hashLen < MD5_HASH_BUFFER_SIZE) + return -1; + + md5_hasher.Hash(pBuffer, (ULONG)bufLen, &md5_hashdata); + + DWORD md5_hashdata_size = sizeof(md5_hashdata.rgb) / sizeof(BYTE); + Assert(md5_hashdata_size == MD5_HASH_BYTE_SIZE); + + if (!WriteHashValueAsText(md5_hashdata.rgb, md5_hashdata_size, hash, hashLen)) + return -1; + + return MD5_HASH_BUFFER_SIZE; // if we had success we wrote MD5_HASH_BUFFER_SIZE bytes to the buffer + +#else // !TARGET_UNIX + + if (!m_Initialized) + { + LogError("Hash class not initialized"); + return -1; + } + + HCRYPTHASH hCryptHash; + BYTE bHash[MD5_HASH_BYTE_SIZE]; + DWORD cbHash = MD5_HASH_BYTE_SIZE; + + if (hashLen < MD5_HASH_BUFFER_SIZE) + return -1; + + if (!CryptCreateHash(m_hCryptProv, CALG_MD5, 0, 0, &hCryptHash)) + goto OnError; + + if (!CryptHashData(hCryptHash, pBuffer, (DWORD)bufLen, 0)) + goto OnError; + + if (!CryptGetHashParam(hCryptHash, HP_HASHVAL, bHash, &cbHash, 0)) + goto OnError; + + if (cbHash != MD5_HASH_BYTE_SIZE) + goto OnError; + + if (!WriteHashValueAsText(bHash, cbHash, hash, hashLen)) + return -1; + + // Clean up. + CryptDestroyHash(hCryptHash); + hCryptHash = NULL; + + return MD5_HASH_BUFFER_SIZE; // if we had success we wrote MD5_HASH_BUFFER_SIZE bytes to the buffer + +OnError: + LogError("Failed to create a hash using the Crypto API (Error 0x%X)", GetLastError()); + + if (hCryptHash != NULL) + { + CryptDestroyHash(hCryptHash); + hCryptHash = NULL; + } + + return -1; + +#endif // !TARGET_UNIX +} diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/hash.h b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/hash.h new file mode 100644 index 00000000000000..8db50cdcaa8d9c --- /dev/null +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/hash.h @@ -0,0 +1,47 @@ +// +// Copyright (c) Microsoft. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. +// + +//---------------------------------------------------------- +// hash.h - Class for hashing a text stream using MD5 hashing +//---------------------------------------------------------- +#ifndef _hash +#define _hash + +#define MD5_HASH_BYTE_SIZE 16 // MD5 is 128-bit, so we need 16 bytes to store it +#define MD5_HASH_BUFFER_SIZE 33 // MD5 is 128-bit, so we need 32 chars + 1 char to store null-terminator + +class Hash +{ +public: + + Hash(); + ~Hash(); + + bool Initialize(); + bool Destroy(); + + bool IsInitialized() + { +#ifdef TARGET_UNIX + return true; // No initialization necessary. +#else // TARGET_UNIX + return m_Initialized; +#endif // !TARGET_UNIX + + } + + int HashBuffer(BYTE* pBuffer, size_t bufLen, char* hash, size_t hashLen); + +private: + + bool WriteHashValueAsText(const BYTE* pHash, size_t cbHash, char* hashTextBuffer, size_t hashTextBufferLen); + +#ifndef TARGET_UNIX + bool m_Initialized; + HCRYPTPROV m_hCryptProv; +#endif // !TARGET_UNIX +}; + +#endif diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp index c4a9f4745cd13c..8eb53dcca7e7e7 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.cpp @@ -9,7 +9,6 @@ //---------------------------------------------------------- #include "standardpch.h" -#include "md5.h" #include "methodcontext.h" #include "compileresult.h" #include "lightweightmap.h" @@ -36,6 +35,9 @@ #define DEBUG_REP(x) #endif +// static variable initialization +Hash MethodContext::m_hash; + MethodContext::MethodContext() { methodSize = 0; @@ -288,7 +290,7 @@ void MethodContext::MethodInitHelper(unsigned char* buff2, unsigned int totalLen { mcPackets packetType = (mcPackets)buff2[buffIndex++]; memcpy(&localsize, &buff2[buffIndex], sizeof(unsigned int)); - buffIndex += 4; + buffIndex += sizeof(unsigned int); switch (packetType) { @@ -6195,44 +6197,56 @@ const WCHAR* MethodContext::repGetStringConfigValue(const WCHAR* name) return value; } -int MethodContext::dumpMethodIdentityInfoToBuffer(char* buff, int len, bool ignoreMethodName /* = false */) +int MethodContext::dumpMethodIdentityInfoToBuffer(char* buff, int len, bool ignoreMethodName /* = false */, CORINFO_METHOD_INFO* optInfo /* = nullptr */, unsigned optFlags /* = 0 */) { - char* obuff = buff; - if (len < METHOD_IDENTITY_INFO_SIZE) return -1; // Obtain the Method Info structure for this method - CORINFO_METHOD_INFO info; - unsigned flags = 0; + CORINFO_METHOD_INFO info; + CORINFO_METHOD_INFO* pInfo = nullptr; + unsigned flags = 0; + + if (optInfo != nullptr) + { + // Use the info we've already retrieved from repCompileMethod(). + pInfo = optInfo; + flags = optFlags; + } + else + { + repCompileMethod(&info, &flags); + pInfo = &info; + } - repCompileMethod(&info, &flags); + char* obuff = buff; // Add the Method Signature - int t = sprintf_s(buff, len, "%s -- ", CallUtils::GetMethodFullName(this, info.ftn, info.args, ignoreMethodName)); + int t = sprintf_s(buff, len, "%s -- ", CallUtils::GetMethodFullName(this, pInfo->ftn, pInfo->args, ignoreMethodName)); buff += t; len -= t; // Add Calling convention information, CorInfoOptions and CorInfoRegionKind - t = sprintf_s(buff, len, "CallingConvention: %d, CorInfoOptions: %d, CorInfoRegionKind: %d ", info.args.callConv, - info.options, info.regionKind); + t = sprintf_s(buff, len, "CallingConvention: %d, CorInfoOptions: %d, CorInfoRegionKind: %d ", pInfo->args.callConv, + pInfo->options, pInfo->regionKind); buff += t; len -= t; // Hash the IL Code for this method and append it to the ID info char ilHash[MD5_HASH_BUFFER_SIZE]; - dumpMD5HashToBuffer(info.ILCode, info.ILCodeSize, ilHash, MD5_HASH_BUFFER_SIZE); + dumpMD5HashToBuffer(pInfo->ILCode, pInfo->ILCodeSize, ilHash, MD5_HASH_BUFFER_SIZE); t = sprintf_s(buff, len, "ILCode Hash: %s", ilHash); buff += t; len -= t; return (int)(buff - obuff); } -int MethodContext::dumpMethodMD5HashToBuffer(char* buff, int len, bool ignoreMethodName /* = false */) + +int MethodContext::dumpMethodMD5HashToBuffer(char* buff, int len, bool ignoreMethodName /* = false */, CORINFO_METHOD_INFO* optInfo /* = nullptr */, unsigned optFlags /* = 0 */) { char bufferIdentityInfo[METHOD_IDENTITY_INFO_SIZE]; - int cbLen = dumpMethodIdentityInfoToBuffer(bufferIdentityInfo, METHOD_IDENTITY_INFO_SIZE, ignoreMethodName); + int cbLen = dumpMethodIdentityInfoToBuffer(bufferIdentityInfo, METHOD_IDENTITY_INFO_SIZE, ignoreMethodName, optInfo, optFlags); if (cbLen < 0) return cbLen; @@ -6244,74 +6258,17 @@ int MethodContext::dumpMethodMD5HashToBuffer(char* buff, int len, bool ignoreMet int MethodContext::dumpMD5HashToBuffer(BYTE* pBuffer, int bufLen, char* hash, int hashLen) { -#ifdef TARGET_UNIX - - MD5HASHDATA md5_hashdata; - MD5 md5_hasher; - - if (hashLen < MD5_HASH_BUFFER_SIZE) - return -1; - - md5_hasher.Hash(pBuffer, (ULONG)bufLen, &md5_hashdata); - - DWORD md5_hashdata_size = sizeof(md5_hashdata.rgb) / sizeof(BYTE); - Assert(md5_hashdata_size == MD5_HASH_BYTE_SIZE); - - for (DWORD i = 0; i < md5_hashdata_size; i++) - { - sprintf_s(hash + i * 2, hashLen - i * 2, "%02X", md5_hashdata.rgb[i]); - } - - return MD5_HASH_BUFFER_SIZE; // if we had success we wrote MD5_HASH_BUFFER_SIZE bytes to the buffer - -#else // !TARGET_UNIX - - HCRYPTPROV hProv = NULL; // CryptoProvider - HCRYPTHASH hHash = NULL; - BYTE bHash[MD5_HASH_BYTE_SIZE]; - DWORD cbHash = MD5_HASH_BYTE_SIZE; - - if (hashLen < MD5_HASH_BUFFER_SIZE) - return -1; - - // Get handle to the crypto provider - if (!CryptAcquireContextA(&hProv, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) - goto OnError; - - if (!CryptCreateHash(hProv, CALG_MD5, 0, 0, &hHash)) - goto OnError; - - if (!CryptHashData(hHash, pBuffer, bufLen, 0)) - goto OnError; - - if (!CryptGetHashParam(hHash, HP_HASHVAL, bHash, &cbHash, 0)) - goto OnError; - - if (cbHash != MD5_HASH_BYTE_SIZE) - goto OnError; - - for (DWORD i = 0; i < MD5_HASH_BYTE_SIZE; i++) + // Lazy initialize the MD5 hasher. + if (!m_hash.IsInitialized()) { - sprintf_s(hash + i * 2, hashLen - i * 2, "%02X", bHash[i]); + if (!m_hash.Initialize()) + { + AssertMsg(false, "Failed to initialize the MD5 hasher"); + return -1; + } } - if (hHash != NULL) - CryptDestroyHash(hHash); - if (hProv != NULL) - CryptReleaseContext(hProv, 0); - - return MD5_HASH_BUFFER_SIZE; // if we had success we wrote MD5_HASH_BUFFER_SIZE bytes to the buffer - -OnError: - AssertMsg(false, "Failed to create a hash using the Crypto API (Error %X)", GetLastError()); - - if (hHash != NULL) - CryptDestroyHash(hHash); - if (hProv != NULL) - CryptReleaseContext(hProv, 0); - return -1; - -#endif // !TARGET_UNIX + return m_hash.HashBuffer(pBuffer, bufLen, hash, hashLen); } MethodContext::Environment MethodContext::cloneEnvironment() diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h index 5e2454d60affee..35e12b485ee674 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontext.h @@ -14,12 +14,10 @@ #include "compileresult.h" #include "lightweightmap.h" #include "errorhandling.h" +#include "hash.h" #define METHOD_IDENTITY_INFO_SIZE 0x10000 // We assume that the METHOD_IDENTITY_INFO_SIZE will not exceed 64KB -#define MD5_HASH_BYTE_SIZE 16 // MD5 is 128-bit, so we need 16 bytes to store it -#define MD5_HASH_BUFFER_SIZE 33 // MD5 is 128-bit, so we need 32 chars + 1 char to store null-terminator - class MethodContext { public: @@ -583,8 +581,8 @@ class MethodContext static int dumpStatTitleToBuffer(char* buff, int len); int methodSize; - int dumpMethodIdentityInfoToBuffer(char* buff, int len, bool ignoreMethodName = false); - int dumpMethodMD5HashToBuffer(char* buff, int len, bool ignoreMethodName = false); + int dumpMethodIdentityInfoToBuffer(char* buff, int len, bool ignoreMethodName = false, CORINFO_METHOD_INFO* optInfo = nullptr, unsigned optFlags = 0); + int dumpMethodMD5HashToBuffer(char* buff, int len, bool ignoreMethodName = false, CORINFO_METHOD_INFO* optInfo = nullptr, unsigned optFlags = 0); void recGlobalContext(const MethodContext& other); @@ -1315,6 +1313,9 @@ class MethodContext #define LWM(map, key, value) LightWeightMap* map; #define DENSELWM(map, value) DenseLightWeightMap* map; #include "lwmlist.h" + + // MD5 hasher + static Hash m_hash; }; // ********************* Please keep this up-to-date to ease adding more *************** diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontextiterator.cpp b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontextiterator.cpp index 0483ac18b3d239..3cae7c7db93275 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontextiterator.cpp +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontextiterator.cpp @@ -92,11 +92,11 @@ bool MethodContextIterator::MoveNext() if (m_progressReport) { - if (m_methodContextNumber % 500 == 0) + if ((m_methodContextNumber % m_progressRate) == 0) { m_timer->Stop(); LogVerbose("Loaded %d at %d per second", m_methodContextNumber, - (int)((double)500 / m_timer->GetSeconds())); + (int)((double)m_progressRate / m_timer->GetSeconds())); m_timer->Start(); } } diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontextiterator.h b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontextiterator.h index 857f268072f97a..5049bf41ce001b 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontextiterator.h +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shared/methodcontextiterator.h @@ -19,6 +19,7 @@ class MethodContextIterator , m_index(0) , m_indexes(nullptr) , m_progressReport(progressReport) + , m_progressRate(1000) , m_timer(nullptr) { if (m_progressReport) @@ -36,6 +37,7 @@ class MethodContextIterator , m_index(0) , m_indexes(indexes) , m_progressReport(progressReport) + , m_progressRate(1000) , m_timer(nullptr) { if (m_progressReport) @@ -98,5 +100,6 @@ class MethodContextIterator // Should we log a progress report as we are loading the method contexts? // The timer is only used when m_progressReport==true. bool m_progressReport; + const int m_progressRate; // Report progress every `m_progressRate` method contexts. SimpleTimer* m_timer; -}; \ No newline at end of file +}; diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shim-collector/CMakeLists.txt b/src/coreclr/src/ToolBox/superpmi/superpmi-shim-collector/CMakeLists.txt index f67eedda48b694..5c7310f4e91e81 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shim-collector/CMakeLists.txt +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shim-collector/CMakeLists.txt @@ -22,6 +22,7 @@ set(SUPERPMI_SHIM_COLLECTOR_SOURCES ../superpmi-shared/callutils.cpp ../superpmi-shared/compileresult.cpp ../superpmi-shared/errorhandling.cpp + ../superpmi-shared/hash.cpp ../superpmi-shared/logging.cpp ../superpmi-shared/mclist.cpp ../superpmi-shared/methodcontext.cpp diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shim-counter/CMakeLists.txt b/src/coreclr/src/ToolBox/superpmi/superpmi-shim-counter/CMakeLists.txt index 65ed573279f93a..d3d9a527867787 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shim-counter/CMakeLists.txt +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shim-counter/CMakeLists.txt @@ -23,6 +23,7 @@ set(SUPERPMI_SHIM_COUNTER_SOURCES ../superpmi-shared/callutils.cpp ../superpmi-shared/compileresult.cpp ../superpmi-shared/errorhandling.cpp + ../superpmi-shared/hash.cpp ../superpmi-shared/logging.cpp ../superpmi-shared/mclist.cpp ../superpmi-shared/methodcontext.cpp diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi-shim-simple/CMakeLists.txt b/src/coreclr/src/ToolBox/superpmi/superpmi-shim-simple/CMakeLists.txt index aaa41d43760171..cb4caded38d0dd 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi-shim-simple/CMakeLists.txt +++ b/src/coreclr/src/ToolBox/superpmi/superpmi-shim-simple/CMakeLists.txt @@ -22,6 +22,7 @@ set(SUPERPMI_SHIM_SIMPLE_SOURCES ../superpmi-shared/callutils.cpp ../superpmi-shared/compileresult.cpp ../superpmi-shared/errorhandling.cpp + ../superpmi-shared/hash.cpp ../superpmi-shared/logging.cpp ../superpmi-shared/mclist.cpp ../superpmi-shared/methodcontext.cpp diff --git a/src/coreclr/src/ToolBox/superpmi/superpmi/CMakeLists.txt b/src/coreclr/src/ToolBox/superpmi/superpmi/CMakeLists.txt index 86992d950c06de..adc3cde6a17882 100644 --- a/src/coreclr/src/ToolBox/superpmi/superpmi/CMakeLists.txt +++ b/src/coreclr/src/ToolBox/superpmi/superpmi/CMakeLists.txt @@ -29,6 +29,7 @@ set(SUPERPMI_SOURCES ../superpmi-shared/callutils.cpp ../superpmi-shared/compileresult.cpp ../superpmi-shared/errorhandling.cpp + ../superpmi-shared/hash.cpp ../superpmi-shared/logging.cpp ../superpmi-shared/mclist.cpp ../superpmi-shared/methodcontext.cpp diff --git a/src/coreclr/tests/src/JIT/superpmi/superpmicollect.cs b/src/coreclr/tests/src/JIT/superpmi/superpmicollect.cs index fddd4d93468445..0798e7bf740e16 100644 --- a/src/coreclr/tests/src/JIT/superpmi/superpmicollect.cs +++ b/src/coreclr/tests/src/JIT/superpmi/superpmicollect.cs @@ -106,7 +106,6 @@ internal class SuperPMICollectionClass private static string s_baseFailMclFile = null; // Pathname for a temporary .MCL file used for noticing superpmi replay failures against preliminary MCH. private static string s_finalFailMclFile = null; // Pathname for a temporary .MCL file used for noticing superpmi replay failures against final MCH. private static string s_baseMchFile = null; // The base .MCH file path - private static string s_nodupMchFile = null; // The nodup .MCH file path private static string s_finalMchFile = null; // The clean thin unique .MCH file path private static string s_tocFile = null; // The .TOC file path for the clean thin unique .MCH file private static string s_errors = ""; // Collect non-fatal file delete errors to display at the end of the collection process. @@ -146,7 +145,6 @@ private static void ChooseFilePaths(string outputMchPath) s_baseFailMclFile = Path.Combine(s_tempDir, "basefail.mcl"); s_finalFailMclFile = Path.Combine(s_tempDir, "finalfail.mcl"); s_baseMchFile = Path.Combine(s_tempDir, "base.mch"); - s_nodupMchFile = Path.Combine(s_tempDir, "nodup.mch"); if (outputMchPath == null) { @@ -341,11 +339,11 @@ private static void CollectMCFiles(string runProgramPath, string runProgramArgum } // Merge MC files: - // mcs -merge \*.mc -recursive + // mcs -merge \*.mc -recursive -dedup -thin private static void MergeMCFiles() { string pattern = Path.Combine(s_tempDir, "*.mc"); - RunProgram(Global.McsPath, "-merge " + s_baseMchFile + " " + pattern + " -recursive"); + RunProgram(Global.McsPath, "-merge " + s_baseMchFile + " " + pattern + " -recursive -dedup -thin"); if (!File.Exists(s_baseMchFile)) { throw new SpmiException("file missing: " + s_baseMchFile); @@ -362,54 +360,32 @@ private static void MergeMCFiles() } } - // Create a thin unique MCH: - // -removeDup -thin - private static void CreateThinUniqueMCH() - { - RunProgram(Global.McsPath, "-removeDup -thin " + s_baseMchFile + " " + s_nodupMchFile); - - if (!File.Exists(s_nodupMchFile)) - { - throw new SpmiException("file missing: " + s_nodupMchFile); - } - - if (!Global.SkipCleanup) - { - // The base file is no longer used; delete it. - if (File.Exists(s_baseMchFile)) - { - SafeFileDelete(s_baseMchFile); - s_baseMchFile = null; - } - } - } - // Create clean MCH file: - // -p -f + // -p -f // if is non-empty: - // -strip + // -strip // else: - // move s_nodupMchFile to s_finalMchFile + // move s_baseMchFile to s_finalMchFile // del private static void CreateCleanMCHFile() { - RunProgram(Global.SuperPmiPath, "-p -f " + s_baseFailMclFile + " " + s_nodupMchFile + " " + Global.JitPath); + RunProgram(Global.SuperPmiPath, "-p -f " + s_baseFailMclFile + " " + s_baseMchFile + " " + Global.JitPath); if (File.Exists(s_baseFailMclFile) && !String.IsNullOrEmpty(File.ReadAllText(s_baseFailMclFile))) { - RunProgram(Global.McsPath, "-strip " + s_nodupMchFile + " " + s_finalMchFile); + RunProgram(Global.McsPath, "-strip " + s_baseMchFile + " " + s_finalMchFile); } else { try { - Console.WriteLine("Moving {0} to {1}", s_nodupMchFile, s_finalMchFile); - File.Move(s_nodupMchFile, s_finalMchFile, overwrite:true); - s_nodupMchFile = null; // This file no longer exists. + Console.WriteLine("Moving {0} to {1}", s_baseMchFile, s_finalMchFile); + File.Move(s_baseMchFile, s_finalMchFile, overwrite:true); + s_baseMchFile = null; // This file no longer exists. } catch (Exception ex) { - string err = string.Format("Error moving file \"{0}\" to \"{1}\": {2}", s_nodupMchFile, s_finalMchFile, ex.Message); + string err = string.Format("Error moving file \"{0}\" to \"{1}\": {2}", s_baseMchFile, s_finalMchFile, ex.Message); s_errors += err + System.Environment.NewLine; Console.Error.WriteLine(err); } @@ -428,11 +404,11 @@ private static void CreateCleanMCHFile() s_baseFailMclFile = null; } - // The nodup file is no longer used. - if ((s_nodupMchFile != null) && File.Exists(s_nodupMchFile)) + // The base file is no longer used. + if ((s_baseMchFile != null) && File.Exists(s_baseMchFile)) { - SafeFileDelete(s_nodupMchFile); - s_nodupMchFile = null; + SafeFileDelete(s_baseMchFile); + s_baseMchFile = null; } } } @@ -476,7 +452,6 @@ private static void VerifyFinalMCH() // Cleanup. If we get here due to a failure of some kind, we want to do full cleanup. If we get here as part // of normal shutdown processing, we want to keep the s_finalMchFile and s_tocFile if s_saveFinalMchFile == true. // del - // del // del // del // rmdir @@ -498,11 +473,6 @@ private static void Cleanup() SafeFileDelete(s_baseMchFile); s_baseMchFile = null; } - if ((s_nodupMchFile != null) && File.Exists(s_nodupMchFile)) - { - SafeFileDelete(s_nodupMchFile); - s_nodupMchFile = null; - } if (!s_saveFinalMchFile) { @@ -542,12 +512,11 @@ public static int Collect(string outputMchPath, string runProgramPath, string ru { // Do a basic SuperPMI collect and validation: // 1. Collect MC files by running a set of sample apps. - // 2. Merge the MC files into a single MCH using "mcs -merge *.mc -recursive". - // 3. Create a thin unique MCH by using "mcs -removeDup -thin". - // 4. Create a clean MCH by running superpmi over the MCH, and using "mcs -strip" to filter + // 2. Merge the MC files into a single MCH using "mcs -merge *.mc -recursive -dedup -thin". + // 3. Create a clean MCH by running superpmi over the MCH, and using "mcs -strip" to filter // out any failures (if any). - // 5. Create a TOC using "mcs -toc". - // 6. Verify the resulting MCH file is error-free when running superpmi against it with the + // 4. Create a TOC using "mcs -toc". + // 5. Verify the resulting MCH file is error-free when running superpmi against it with the // same JIT used for collection. // // MCH files are big. If we don't need them anymore, clean them up right away to avoid @@ -564,7 +533,6 @@ public static int Collect(string outputMchPath, string runProgramPath, string ru ChooseFilePaths(outputMchPath); CollectMCFiles(runProgramPath, runProgramArguments); MergeMCFiles(); - CreateThinUniqueMCH(); CreateCleanMCHFile(); CreateTOC(); VerifyFinalMCH();