From ee856ff4d24ae868a567ec148be33800b1f0efe0 Mon Sep 17 00:00:00 2001 From: Alexander Penev Date: Sat, 28 Dec 2019 10:06:33 +0200 Subject: [PATCH] [cling] Improve symbol resolution. This patch consolidates the symbol resolution facilities throughout TCling into a new singleton class Dyld part of the cling's DynamicLibraryManager. The new dyld is responsible for: * Symlink resolution -- it implements a memory efficient representation of the full path to shared objects allowing search at constant time O(1). This also fixes issues when resolving symbols from OSX where the system libraries contain multiple levels of symlinks. * Bloom filter optimization -- it uses a stohastic data structure which gives a definitive answer if a symbol is not in the set. The implementation checks the .gnu.hash section in ELF which is the GNU implementation of a bloom filter and uses it. If the symbol is not in the bloom filter, the implementation builds its own and uses it. The measured performance of the bloom filter is 30% speed up for 2mb more memory. The custom bloom filter on top of the .gnu.hash filter gives 1-2% better performance. The advantage for the custom bloom filter is that it works on all implementations which do not support .gnu.hash (windows and osx). It is also customizable if we want to further reduce the false positive rates (currently at p=2%). * Hash table optimization -- we build a hash table which contains all symbols for a given library. This allows us to avoid the fallback symbol iteration if multiple symbols from the same library are requested. The hash table optimization targets to optimize the case where the bloom filter tells us the symbol is *maybe* in the library. Patch by Alexander Penev and me! --- README/CREDITS | 5 + core/metacling/src/TCling.cxx | 357 +------ core/metacling/src/TClingCallbacks.cxx | 105 +- .../cling/Interpreter/DynamicLibraryManager.h | 44 +- .../cling/lib/Interpreter/CMakeLists.txt | 1 + .../DynamicLibraryManagerSymbol.cpp | 961 ++++++++++++++++++ 6 files changed, 1051 insertions(+), 422 deletions(-) create mode 100644 interpreter/cling/lib/Interpreter/DynamicLibraryManagerSymbol.cpp diff --git a/README/CREDITS b/README/CREDITS index fc6242da0ce0d..d5a9cc5ea63c3 100644 --- a/README/CREDITS +++ b/README/CREDITS @@ -776,6 +776,11 @@ N: Iulia Pasov E: iulia.pasov@gmail.com D: prototyping JavaScript graphics with d3.js +N: Alexander Penev +E: alexander_penev@yahoo.com +D: Dyld symbol resolution facilities in cling and TCling +D: runtime c++ modules on osx + N: Marc Paterno E: paterno@fnal.gov D: implement utility functions used by TGraphAsymmErrors::BayesDivide diff --git a/core/metacling/src/TCling.cxx b/core/metacling/src/TCling.cxx index 5fffaea73dd81..a8bcca6811b9f 100644 --- a/core/metacling/src/TCling.cxx +++ b/core/metacling/src/TCling.cxx @@ -653,11 +653,6 @@ extern "C" void TCling__SplitAclicMode(const char* fileName, string &mode, io = aclicio.Data(); fname = f.Data(); } -// Implemented in TClingCallbacks. -extern "C" void TCling__FindLoadedLibraries(std::vector> &sLibraries, - std::vector &sPaths, - cling::Interpreter &interpreter, bool searchSystem); - //______________________________________________________________________________ // // @@ -1531,9 +1526,16 @@ TCling::TCling(const char *name, const char *title, const char* const argv[]) fInterpreter->setCallbacks(std::move(clingCallbacks)); if (!fromRootCling) { + cling::DynamicLibraryManager& DLM = *fInterpreter->getDynamicLibraryManager(); // Make sure cling looks into ROOT's libdir, even if not part of LD_LIBRARY_PATH // e.g. because of an RPATH build. - fInterpreter->getDynamicLibraryManager()->addSearchPath(TROOT::GetLibDir().Data()); + DLM.addSearchPath(TROOT::GetLibDir().Data()); + auto ShouldPermanentlyIgnore = [](llvm::StringRef FileName) -> bool{ + llvm::StringRef stem = llvm::sys::path::stem(FileName); + return stem.startswith("libNew") || stem.startswith("libcppyy_backend"); + }; + // Initialize the dyld for the llvmLazyFunctionCreator. + DLM.initializeDyld(ShouldPermanentlyIgnore); } } @@ -1584,69 +1586,6 @@ void TCling::ShutDown() ResetGlobals(); } -//////////////////////////////////////////////////////////////////////////////// -/// Wrapper around dladdr (and friends) - -static std::string FindLibraryName(void (*func)()) -{ -#if defined(__CYGWIN__) && defined(__GNUC__) - return {}; -#elif defined(G__WIN32) - MEMORY_BASIC_INFORMATION mbi; - if (!VirtualQuery (func, &mbi, sizeof (mbi))) - { - return {}; - } - - HMODULE hMod = (HMODULE) mbi.AllocationBase; - char moduleName[MAX_PATH]; - - if (!GetModuleFileNameA (hMod, moduleName, sizeof (moduleName))) - { - return {}; - } - return ROOT::TMetaUtils::GetRealPath(moduleName); -#else - Dl_info info; - if (dladdr((void*)func, &info) == 0) { - // Not in a known shared library, let's give up - return {}; - } else { - if (strchr(info.dli_fname, '/')) - return ROOT::TMetaUtils::GetRealPath(info.dli_fname); - // Else absolute path. For all we know that's a binary. - // Some people have dictionaries in binaries, this is how we find their path: - // (see also https://stackoverflow.com/a/1024937/6182509) -# if defined(R__MACOSX) - char buf[PATH_MAX] = { 0 }; - uint32_t bufsize = sizeof(buf); - if (_NSGetExecutablePath(buf, &bufsize) >= 0) - return ROOT::TMetaUtils::GetRealPath(buf); - return ROOT::TMetaUtils::GetRealPath(info.dli_fname); -# elif defined(R__UNIX) - char buf[PATH_MAX] = { 0 }; - // Cross our fingers that /proc/self/exe exists. - if (readlink("/proc/self/exe", buf, sizeof(buf)) > 0) - return ROOT::TMetaUtils::GetRealPath(buf); - std::string pipeCmd = std::string("which \"") + info.dli_fname + "\""; - FILE* pipe = popen(pipeCmd.c_str(), "r"); - if (!pipe) - return ROOT::TMetaUtils::GetRealPath(info.dli_fname); - std::string result; - while (fgets(buf, sizeof(buf), pipe)) { - result += buf; - } - pclose(pipe); - return ROOT::TMetaUtils::GetRealPath(result); -# else -# error "Unsupported platform." -# endif - return {}; - } -#endif - -} - //////////////////////////////////////////////////////////////////////////////// /// Helper to initialize TVirtualStreamerInfo's factor early. /// Use static initialization to insure only one TStreamerInfo is created. @@ -2068,7 +2007,7 @@ void TCling::RegisterModule(const char* modulename, if (payloadCode) code += payloadCode; - std::string dyLibName = FindLibraryName(triggerFunc); + std::string dyLibName = cling::DynamicLibraryManager::getSymbolLocation(triggerFunc); assert(!llvm::sys::fs::is_symlink_file(dyLibName)); if (dyLibName.empty()) { @@ -6243,260 +6182,42 @@ bool TCling::LibraryLoadingFailed(const std::string& errmessage, const std::stri return false; } -// This is a GNU implementation of hash used in bloom filter! -static uint32_t GNUHash(StringRef S) { - uint32_t H = 5381; - for (uint8_t C : S) - H = (H << 5) + H + C; - return H; -} - -static StringRef GetGnuHashSection(llvm::object::ObjectFile *file) { - for (auto S : file->sections()) { - StringRef name; - S.getName(name); - if (name == ".gnu.hash") { - StringRef content; - S.getContents(content); - return content; - } - } - return ""; -} - -/// Bloom filter in a stohastic data structure which can tell us if a symbol -/// name does not exist in a library with 100% certainty. If it tells us it exists -/// this may not be true: https://blogs.oracle.com/solaris/gnu-hash-elf-sections-v2 -/// -/// ELF has this optimization in the new linkers by default, It is stored in the -/// gnu.hash section of the object file. -/// -///\returns true true if the symbol may be in the library. -static bool MayExistInObjectFile(llvm::object::ObjectFile *soFile, uint32_t hash) { - if (!soFile->isELF()) - return true; - - // LLVM9: soFile->makeTriple().is64Bit() - const int bits = 8 * soFile->getBytesInAddress(); - - StringRef contents = GetGnuHashSection(soFile); - if (contents.size() < 16) - // We need to search if the library doesn't have .gnu.hash section! - return true; - const char* hashContent = contents.data(); - - // See https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ for .gnu.hash table layout. - uint32_t maskWords = *reinterpret_cast(hashContent + 8); - uint32_t shift2 = *reinterpret_cast(hashContent + 12); - uint32_t hash2 = hash >> shift2; - uint32_t n = (hash / bits) % maskWords; - - const char *bloomfilter = hashContent + 16; - const char *hash_pos = bloomfilter + n*(bits/8); // * (Bits / 8) - uint64_t word = *reinterpret_cast(hash_pos); - uint64_t bitmask = ( (1ULL << (hash % bits)) | (1ULL << (hash2 % bits))); - return (bitmask & word) == bitmask; -} - -/// Looks up symbols from a an object file, representing the library. -///\returns true on success. -static bool FindSymbol(const std::string &library_filename, - const std::string &mangled_name, unsigned IgnoreSymbolFlags = 0) -{ - auto ObjF = llvm::object::ObjectFile::createObjectFile(ROOT::TMetaUtils::GetRealPath(library_filename)); - if (!ObjF) { - if (gDebug > 1) - Warning("TCling__FindSymbol", "Failed to read object file %s", library_filename.c_str()); - return false; - } - - llvm::object::ObjectFile *BinObjFile = ObjF.get().getBinary(); - - uint32_t hashedMangle = GNUHash(mangled_name); - // If the symbol does not exist, exit early. In case it may exist, iterate. - if (!MayExistInObjectFile(BinObjFile, hashedMangle)) - return false; - - for (const auto &S : BinObjFile->symbols()) { - uint32_t Flags = S.getFlags(); - // Do not insert in the table symbols flagged to ignore. - if (Flags & IgnoreSymbolFlags) - continue; - - // Note, we are at last resort and loading library based on a weak - // symbol is allowed. Otherwise, the JIT will issue an unresolved - // symbol error. - // - // There are other weak symbol kinds (marked as 'V') to denote - // typeinfo and vtables. It is unclear whether we should load such - // libraries or from which library we should resolve the symbol. - // We seem to not have a way to differentiate it from the symbol API. - - llvm::Expected SymNameErr = S.getName(); - if (!SymNameErr) { - Warning("TCling__FindSymbol", "Failed to read symbol %s", mangled_name.c_str()); - continue; - } - - if (SymNameErr.get() == mangled_name) { - if (gDebug > 1) - Info("TCling__FindSymbol", "Symbol %s found in %s\n", - mangled_name.c_str(), library_filename.c_str()); - return true; - } - } - - if (!BinObjFile->isELF()) - return false; - - // ELF file format has .dynstr section for the dynamic symbol table. - const auto *ElfObj = cast(BinObjFile); - - for (const auto &S : ElfObj->getDynamicSymbolIterators()) { - uint32_t Flags = S.getFlags(); - // DO NOT insert to table if symbol was undefined - if (Flags & llvm::object::SymbolRef::SF_Undefined) - continue; - - // Note, we are at last resort and loading library based on a weak - // symbol is allowed. Otherwise, the JIT will issue an unresolved - // symbol error. - // - // There are other weak symbol kinds (marked as 'V') to denote - // typeinfo and vtables. It is unclear whether we should load such - // libraries or from which library we should resolve the symbol. - // We seem to not have a way to differentiate it from the symbol API. - - llvm::Expected SymNameErr = S.getName(); - if (!SymNameErr) { - Warning("TCling__FindSymbol", "Failed to read symbol %s", mangled_name.c_str()); - continue; - } - - if (SymNameErr.get() == mangled_name) - return true; - } - - return false; -} - -static std::string ResolveSymbol(const std::string& mangled_name, - cling::Interpreter *interp, - bool searchSystem = true) { - assert(!mangled_name.empty()); - using namespace llvm::sys::path; - using namespace llvm::sys::fs; - +static void* LazyFunctionCreatorAutoloadForModule(const std::string &mangled_name, + const cling::DynamicLibraryManager &DLM) { R__LOCKGUARD(gInterpreterMutex); - static bool sFirstRun = true; - static bool sFirstSystemLibrary = true; - // LibraryPath contains a pair offset to the canonical dirname (stored as - // sPaths[i]) and a filename. For example, `/home/foo/root/lib/libTMVA.so`, - // the .first will contain an index in sPaths where `/home/foo/root/lib/` - // will be stored and .second `libTMVA.so`. - // This approach reduces the duplicate paths as at one location there may be - // plenty of libraries. - using LibraryPath = std::pair; - using LibraryPaths = std::vector; - using BasePaths = std::vector; - static LibraryPaths sLibraries; - static BasePaths sPaths; - static LibraryPaths sQueriedLibraries; - - // For system header AutoLoading - static LibraryPaths sSysLibraries; - - if (sFirstRun) { - TCling__FindLoadedLibraries(sLibraries, sPaths, *interp, /* searchSystem */ false); - sFirstRun = false; - } - - auto GetLibFileName = [](const LibraryPath &P, const BasePaths &BaseP) { - llvm::SmallString<512> Vec(BaseP[P.first]); - llvm::sys::path::append(Vec, StringRef(P.second)); - return Vec.str().str(); + auto LibLoader = [](const std::string& LibName) -> bool { + if (gSystem->Load(LibName.c_str(), "", false) < 0) { + Error("TCling__LazyFunctionCreatorAutoloadForModule", + "Failed to load library %s", LibName.c_str()); + return false; + } + return true; //success. }; - if (!sQueriedLibraries.empty()) { - // Last call we were asked if a library contains a symbol. Usually, the - // caller wants to load this library. Check if was loaded and remove it - // from our lists of not-yet-loaded libs. - for (const LibraryPath &P : sQueriedLibraries) { - const std::string LibName = GetLibFileName(P, sPaths); - if (!gCling->IsLibraryLoaded(LibName.c_str())) - continue; - - sLibraries.erase(std::remove(sLibraries.begin(), sLibraries.end(), P), sLibraries.end()); - if (!sSysLibraries.empty()) - sSysLibraries.erase(std::remove(sSysLibraries.begin(), sSysLibraries.end(), P), sSysLibraries.end()); - } - } - - if (sFirstRun) { - TCling__FindLoadedLibraries(sLibraries, sPaths, *interp, /* searchSystem */ false); - sFirstRun = false; - } - - // Iterate over files under this path. We want to get each ".so" files - for (const LibraryPath &P : sLibraries) { - const std::string LibName = GetLibFileName(P, sPaths); - - // FIXME: We should also iterate over the dynamic symbols for ROOT - // libraries. However, it seems to be redundant for the moment as we do - // not strictly require symbols from those sections. Enable after checking - // performance! - if (FindSymbol(LibName, mangled_name, /*ignore*/ - llvm::object::SymbolRef::SF_Undefined)) { - sQueriedLibraries.push_back(P); - return LibName; - } - } - - if (!searchSystem) - return ""; - - // Lookup in non-system libraries failed. Expand the search to the system. - if (sFirstSystemLibrary) { - TCling__FindLoadedLibraries(sSysLibraries, sPaths, *interp, /* searchSystem */ true); - sFirstSystemLibrary = false; - } - - for (const LibraryPath &P : sSysLibraries) { - const std::string LibName = GetLibFileName(P, sPaths); - - if (FindSymbol(LibName, mangled_name, /*ignore*/ - llvm::object::SymbolRef::SF_Undefined | - llvm::object::SymbolRef::SF_Weak)) { - sQueriedLibraries.push_back(P); - return LibName; - } - } - - return ""; // Search found no match. -} - -static void* LazyFunctionCreatorAutoloadForModule(const std::string &mangled_name, - cling::Interpreter *interp) { -// The JIT gives us a mangled name which has only one leading underscore on -// all platforms, for instance _ZN8TRandom34RndmEv. However, on OSX the -// linker stores this symbol as __ZN8TRandom34RndmEv (adding an extra _). - std::string maybe_prefixed_mangled_name = mangled_name; #ifdef R__MACOSX + // The JIT gives us a mangled name which has only one leading underscore on + // all platforms, for instance _ZN8TRandom34RndmEv. However, on OSX the + // linker stores this symbol as __ZN8TRandom34RndmEv (adding an extra _). assert(!llvm::StringRef(mangled_name).startswith("__") && "Already added!"); - maybe_prefixed_mangled_name = "_" + maybe_prefixed_mangled_name; -#endif + std::string libName = DLM.searchLibrariesForSymbol('_' + mangled_name, + /*searchSystem=*/ true); +#else + std::string libName = DLM.searchLibrariesForSymbol(mangled_name, + /*searchSystem=*/ true); +#endif //R__MACOSX - std::string LibName = ResolveSymbol(maybe_prefixed_mangled_name, interp); - if (LibName.empty()) + assert(!llvm::StringRef(libName).startswith("libNew") && + "We must not resolve symbols from libNew!"); + + if (libName.empty()) return nullptr; - if (gSystem->Load(LibName.c_str(), "", false) < 0) - Error("TCling__LazyFunctionCreatorAutoloadForModule", - "Failed to load library %s", LibName.c_str()); + if (!LibLoader(libName)) + return nullptr; + + return llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(mangled_name); - void* addr = llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(mangled_name.c_str()); - return addr; } //////////////////////////////////////////////////////////////////////////////// @@ -6504,7 +6225,8 @@ static void* LazyFunctionCreatorAutoloadForModule(const std::string &mangled_nam void* TCling::LazyFunctionCreatorAutoload(const std::string& mangled_name) { if (fCxxModulesEnabled) - return LazyFunctionCreatorAutoloadForModule(mangled_name, GetInterpreterImpl()); + return LazyFunctionCreatorAutoloadForModule(mangled_name, + *GetInterpreterImpl()->getDynamicLibraryManager()); // First see whether the symbol is in the library that we are currently // loading. It will have access to the symbols of its dependent libraries, @@ -7157,7 +6879,7 @@ static std::string GetSharedLibImmediateDepsSlow(std::string lib, if (BinObjFile->isELF()) { // Skip the symbols which are part of the C/C++ runtime and have a // fixed library version. See binutils ld VERSION. Those reside in - // 'system' libraries, which we avoid in ResolveSymbol. + // 'system' libraries, which we avoid in FindLibraryForSymbol. if (SymName.contains("@@GLIBCXX") || SymName.contains("@@CXXABI") || SymName.contains("@@GLIBC") || SymName.contains("@@GCC")) continue; @@ -7177,7 +6899,8 @@ static std::string GetSharedLibImmediateDepsSlow(std::string lib, if (skipLoadedLibs && llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(SymName)) continue; - std::string found = ResolveSymbol(SymName, interp, /*searchSystem*/false); + R__LOCKGUARD(gInterpreterMutex); + std::string found = interp->getDynamicLibraryManager()->searchLibrariesForSymbol(SymName, /*searchSystem*/false); // The expected output is just filename without the full path, which // is not very accurate, because our Dyld implementation might find // a match in location a/b/c.so and if we return just c.so ROOT might diff --git a/core/metacling/src/TClingCallbacks.cxx b/core/metacling/src/TClingCallbacks.cxx index 1a5a4247dca13..74101dc581af8 100644 --- a/core/metacling/src/TClingCallbacks.cxx +++ b/core/metacling/src/TClingCallbacks.cxx @@ -34,6 +34,8 @@ #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" + +#include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -74,9 +76,6 @@ extern "C" { void TCling__RestoreInterpreterMutex(void *state); void *TCling__LockCompilationDuringUserCodeExecution(); void TCling__UnlockCompilationDuringUserCodeExecution(void *state); - void TCling__FindLoadedLibraries(std::vector> &sLibraries, - std::vector &sPaths, - cling::Interpreter &interpreter, bool searchSystem); } TClingCallbacks::TClingCallbacks(cling::Interpreter *interp, bool hasCodeGen) : InterpreterCallbacks(interp) @@ -921,103 +920,3 @@ void TClingCallbacks::UnlockCompilationDuringUserCodeExecution(void *StateInfo) { TCling__UnlockCompilationDuringUserCodeExecution(StateInfo); } - -static bool shouldIgnore(const std::string& FileName, - const cling::DynamicLibraryManager& dyLibManager) { - if (llvm::sys::fs::is_directory(FileName)) - return true; - - if (!cling::DynamicLibraryManager::isSharedLibrary(FileName)) - return true; - - // No need to check linked libraries, as this function is only invoked - // for symbols that cannot be found (neither by dlsym nor in the JIT). - if (dyLibManager.isLibraryLoaded(FileName.c_str())) - return true; - - - auto ObjF = llvm::object::ObjectFile::createObjectFile(FileName); - if (!ObjF) { - if (gDebug > 1) - ROOT::TMetaUtils::Warning("[DyLD]", "Failed to read object file %s", - FileName.c_str()); - return true; - } - - llvm::object::ObjectFile *file = ObjF.get().getBinary(); - - if (isa(*file)) { - for (auto S : file->sections()) { - StringRef name; - S.getName(name); - if (name == ".text") { - // Check if the library has only debug symbols, usually when - // stripped with objcopy --only-keep-debug. This check is done by - // reading the manual of objcopy and inspection of stripped with - // objcopy libraries. - auto SecRef = static_cast(S); - if (SecRef.getType() == llvm::ELF::SHT_NOBITS) - return true; - - return (SecRef.getFlags() & llvm::ELF::SHF_ALLOC) == 0; - } - } - return true; - } - //FIXME: Handle osx using isStripped after upgrading to llvm9. - - llvm::StringRef fileStem = llvm::sys::path::stem(FileName); - return fileStem.startswith("libNew") || fileStem.startswith("libcppyy_backend"); -} - -static void SearchAndAddPath(const std::string& Path, - std::vector> &sLibraries, std::vector &sPaths, - std::unordered_set& alreadyLookedPath, cling::DynamicLibraryManager* dyLibManager) -{ - // Already searched? - auto it = alreadyLookedPath.insert(Path); - if (!it.second) - return; - StringRef DirPath(Path); - if (!llvm::sys::fs::is_directory(DirPath)) - return; - - bool flag = false; - std::error_code EC; - for (llvm::sys::fs::directory_iterator DirIt(DirPath, EC), DirEnd; - DirIt != DirEnd && !EC; DirIt.increment(EC)) { - - std::string FileName(DirIt->path()); - if (shouldIgnore(FileName, *dyLibManager)) - continue; - - sLibraries.push_back(std::make_pair(sPaths.size(), llvm::sys::path::filename(FileName))); - flag = true; - } - - if (flag) - sPaths.push_back(Path); -} - -// Extracted here to circumvent ODR clash between -// std::Sp_counted_ptr_inplace, (_gnu_cxx::_Lock_policy)2>::_M_get_deleter(std::type_info const&) -// coming from a no-rtti and a rtti build in libstdc++ from GCC >= 8.1. -// In its function body, rtti uses `arg0 == typeid(...)` protected by #ifdef __cpp_rtti. Depending -// on which symbol (with or without rtti) the linker picks up, the argument `arg0` is a valid -// type_info - or not, in which case this comparison crashes. -// Circumvent this by removing the rtti-use of this function: -void TCling__FindLoadedLibraries(std::vector> &sLibraries, - std::vector &sPaths, - cling::Interpreter &interpreter, bool searchSystem) -{ - // Store the information of path so that we don't have to iterate over the same path again and again. - static std::unordered_set alreadyLookedPath; - cling::DynamicLibraryManager* dyLibManager = interpreter.getDynamicLibraryManager(); - - const auto &searchPaths = dyLibManager->getSearchPath(); - for (const cling::DynamicLibraryManager::SearchPathInfo &Info : searchPaths) { - if (!Info.IsUser && !searchSystem) - continue; - SearchAndAddPath(Info.Path, sLibraries, sPaths, alreadyLookedPath, dyLibManager); - } -} diff --git a/interpreter/cling/include/cling/Interpreter/DynamicLibraryManager.h b/interpreter/cling/include/cling/Interpreter/DynamicLibraryManager.h index 4821fe60f7063..a668a9c19edfc 100644 --- a/interpreter/cling/include/cling/Interpreter/DynamicLibraryManager.h +++ b/interpreter/cling/include/cling/Interpreter/DynamicLibraryManager.h @@ -11,6 +11,7 @@ #define CLING_DYNAMIC_LIBRARY_MANAGER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -43,7 +44,12 @@ namespace cling { /// True if the Path is on the LD_LIBRARY_PATH. /// bool IsUser; + + bool operator==(const SearchPathInfo& Other) const { + return IsUser == Other.IsUser && Path == Other.Path; + } }; + using SearchPathInfos = llvm::SmallVector; private: typedef const void* DyLibHandle; typedef llvm::DenseMap DyLibs; @@ -58,7 +64,7 @@ namespace cling { ///\brief System's include path, get initialized at construction time. /// - llvm::SmallVector m_SearchPaths; + SearchPathInfos m_SearchPaths; InterpreterCallbacks* m_Callbacks; @@ -80,6 +86,11 @@ namespace cling { ///\returns the canonical path to the file or empty string if not found /// std::string lookupLibMaybeAddExt(llvm::StringRef filename) const; + + /// On a success returns to full path to a shared object that holds the + /// symbol pointed by func. + /// + static std::string getSymbolLocation(void* func); public: DynamicLibraryManager(const InvocationOptions& Opts); ~DynamicLibraryManager(); @@ -91,7 +102,7 @@ namespace cling { /// ///\returns System include paths. /// - const llvm::SmallVectorImpl& getSearchPath() { + const SearchPathInfos& getSearchPaths() const { return m_SearchPaths; } @@ -128,6 +139,26 @@ namespace cling { /// bool isLibraryLoaded(llvm::StringRef fullPath) const; + /// Initialize the dyld. + /// + ///\param [in] shouldPermanentlyIgnore - a callback deciding if a library + /// should be ignored from the result set. Useful for ignoring + /// dangerous libraries such as the ones overriding malloc. + /// + void + initializeDyld(std::function shouldPermanentlyIgnore) + const; + + /// Find the first not-yet-loaded shared object that contains the symbol + /// + ///\param[in] mangledName - the mangled name to look for. + ///\param[in] searchSystem - whether to decend into system libraries. + /// + ///\returns the library name if found, and empty string otherwise. + /// + std::string searchLibrariesForSymbol(const std::string& mangledName, + bool searchSystem = true) const; + ///\brief Explicitly tell the execution engine to use symbols from /// a shared library that would otherwise not be used for symbol /// resolution, e.g. because it was dlopened with RTLD_LOCAL. @@ -145,6 +176,15 @@ namespace cling { /// is a library but of incompatible file format. /// static bool isSharedLibrary(llvm::StringRef libFullPath, bool* exists = 0); + + /// On a success returns to full path to a shared object that holds the + /// symbol pointed by func. + /// + template + static std::string getSymbolLocation(T func) { + static_assert(std::is_pointer::value, "Must be a function pointer!"); + return getSymbolLocation(reinterpret_cast(func)); + } }; } // end namespace cling #endif // CLING_DYNAMIC_LIBRARY_MANAGER_H diff --git a/interpreter/cling/lib/Interpreter/CMakeLists.txt b/interpreter/cling/lib/Interpreter/CMakeLists.txt index 9d21ed460ccc6..c68ca4d3ed671 100644 --- a/interpreter/cling/lib/Interpreter/CMakeLists.txt +++ b/interpreter/cling/lib/Interpreter/CMakeLists.txt @@ -76,6 +76,7 @@ add_cling_library(clingInterpreter OBJECT DeclUnloader.cpp DeviceKernelInliner.cpp DynamicLibraryManager.cpp + DynamicLibraryManagerSymbol.cpp DynamicLookup.cpp DynamicExprInfo.cpp Exception.cpp diff --git a/interpreter/cling/lib/Interpreter/DynamicLibraryManagerSymbol.cpp b/interpreter/cling/lib/Interpreter/DynamicLibraryManagerSymbol.cpp new file mode 100644 index 0000000000000..3192d96c889e2 --- /dev/null +++ b/interpreter/cling/lib/Interpreter/DynamicLibraryManagerSymbol.cpp @@ -0,0 +1,961 @@ +//------------------------------------------------------------------------------ +// CLING - the C++ LLVM-based InterpreterG :) +// author: Vassil Vassilev +// author: Alexander Penev +// +// This file is dual-licensed: you can choose to license it under the University +// of Illinois Open Source License or the GNU Lesser General Public License. See +// LICENSE.TXT for details. +//------------------------------------------------------------------------------ + +#include "cling/Interpreter/DynamicLibraryManager.h" +#include "cling/Utils/Output.h" + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/Path.h" + +#include +#include +#include +#include +#include + +#include "llvm/Config/config.h" // Get configuration settings + +#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN) +#include +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif // HAVE_UNISTD_H + +#ifdef __APPLE__ +#include +#endif // __APPLE__ + +#ifdef LLVM_ON_WIN32 +#include // For GetModuleFileNameA +#include // For VirtualQuery +#endif + +// FIXME: Implement debugging output stream in cling. +constexpr unsigned DEBUG = 0; + +namespace { + +using BasePath = std::string; + +// This is a GNU implementation of hash used in bloom filter! +static uint32_t GNUHash(llvm::StringRef S) { + uint32_t H = 5381; + for (uint8_t C : S) + H = (H << 5) + H + C; + return H; +} + +constexpr uint32_t log2u(std::uint32_t n) { + return (n > 1) ? 1 + log2u(n >> 1) : 0; +} + +struct BloomFilter { + + // https://hur.st/bloomfilter + // + // n = ceil(m / (-k / log(1 - exp(log(p) / k)))) + // p = pow(1 - exp(-k / (m / n)), k) + // m = ceil((n * log(p)) / log(1 / pow(2, log(2)))); + // k = round((m / n) * log(2)); + // + // n = symbolsCount + // p = 0.02 + // k = 2 (k1=GNUHash and k2=GNUHash >> bloomShift) + // m = ceil((symbolsCount * log(p)) / log(1 / pow(2, log(2)))); + // bloomShift = min(5 for bits=32 or 6 for bits=64, log2(symbolsCount)) + // bloomSize = ceil((-1.44 * n * log2f(p)) / bits) + + const int m_Bits = 8 * sizeof(uint64_t); + const float m_P = 0.02; + + bool m_IsInitialized = false; + uint32_t m_SymbolsCount = 0; + uint32_t m_BloomSize = 0; + uint32_t m_BloomShift = 0; + std::vector m_BloomTable; + + bool TestHash(uint32_t hash) const { + // This function is superhot. No branches here, breaks inlining and makes + // overall performance around 4x slower. + assert(m_IsInitialized && "Not yet initialized!"); + uint32_t hash2 = hash >> m_BloomShift; + uint32_t n = (hash >> log2u(m_Bits)) % m_BloomSize; + uint64_t mask = ((1ULL << (hash % m_Bits)) | (1ULL << (hash2 % m_Bits))); + return (mask & m_BloomTable[n]) == mask; + } + + void AddHash(uint32_t hash) { + assert(m_IsInitialized && "Not yet initialized!"); + uint32_t hash2 = hash >> m_BloomShift; + uint32_t n = (hash >> log2u(m_Bits)) % m_BloomSize; + uint64_t mask = ((1ULL << (hash % m_Bits)) | (1ULL << (hash2 % m_Bits))); + m_BloomTable[n] |= mask; + } + + void ResizeTable(uint32_t newSymbolsCount) { + assert(m_SymbolsCount == 0 && "Not supported yet!"); + m_SymbolsCount = newSymbolsCount; + m_BloomSize = ceil((-1.44f * m_SymbolsCount * log2f(m_P)) / m_Bits); + m_BloomShift = std::min(6u, log2u(m_SymbolsCount)); + m_BloomTable.resize(m_BloomSize); + } + +}; + + +/// An efficient representation of a full path to a library which does not +/// duplicate common path patterns reducing the overall memory footprint. +/// +/// For example, `/home/.../lib/libA.so`, m_Path will contain a pointer +/// to `/home/.../lib/` +/// will be stored and .second `libA.so`. +/// This approach reduces the duplicate paths as at one location there may be +/// plenty of libraries. +struct LibraryPath { + const BasePath& m_Path; + std::string m_LibName; + BloomFilter m_Filter; + llvm::StringSet<> m_Symbols; + + LibraryPath(const BasePath& Path, const std::string& LibName) + : m_Path(Path), m_LibName(LibName) { } + + bool operator==(const LibraryPath &other) const { + return (&m_Path == &other.m_Path || m_Path == other.m_Path) && + m_LibName == other.m_LibName; + } + + std::string GetFullName() const { + llvm::SmallString<512> Vec(m_Path); + llvm::sys::path::append(Vec, llvm::StringRef(m_LibName)); + return Vec.str().str(); + } + + void AddBloom(llvm::StringRef symbol) { + m_Filter.AddHash(GNUHash(symbol)); + } + + llvm::StringRef AddSymbol(const std::string& symbol) { + auto it = m_Symbols.insert(symbol); + return it.first->getKey(); + } + + bool isBloomFilterEmpty() const { + assert(m_Filter.m_IsInitialized && "Bloom filter not initialized!"); + return m_Filter.m_SymbolsCount == 0; + } + + void InitializeBloomFilter(uint32_t newSymbolsCount) { + assert(isBloomFilterEmpty() && "Cannot re-initialize non-empty filter!"); + assert(!m_Filter.m_IsInitialized && + "Cannot re-initialize non-empty filter!"); + m_Filter.m_IsInitialized = true; + m_Filter.ResizeTable(newSymbolsCount); + } + + bool MayExistSymbol(uint32_t hash) const { + // The library had no symbols and the bloom filter is empty. + if (isBloomFilterEmpty()) + return false; + + return m_Filter.TestHash(hash); + } + + bool ExistSymbol(llvm::StringRef symbol) const { + return m_Symbols.find(symbol) != m_Symbols.end(); + } +}; + + +/// A helper class keeping track of loaded libraries. It implements a fast +/// search O(1) while keeping deterministic iterability in a memory efficient +/// way. The underlying set uses a custom hasher for better efficiency given the +/// specific problem where the library names (m_LibName) are relatively short +/// strings and the base paths (m_Path) are repetitive long strings. +class LibraryPaths { + struct LibraryPathHashFn { + size_t operator()(const LibraryPath& item) const { + return std::hash()(item.m_Path.length()) ^ + std::hash()(item.m_LibName); + } + }; + + std::vector m_Libs; + std::unordered_set m_LibsH; +public: + bool HasRegisteredLib(const LibraryPath& Lib) const { + return m_LibsH.count(Lib); + } + + void RegisterLib(const LibraryPath& Lib) { + auto it = m_LibsH.insert(Lib); + assert(it.second && "Already registered!"); + m_Libs.push_back(&*it.first); + } + + void UnregisterLib(const LibraryPath& Lib) { + auto found = m_LibsH.find(Lib); + if (found == m_LibsH.end()) + return; + + m_Libs.erase(std::find(m_Libs.begin(), m_Libs.end(), &*found)); + m_LibsH.erase(found); + } + + size_t size() const { + assert(m_Libs.size() == m_LibsH.size()); + return m_Libs.size(); + } + + const std::vector& GetLibraries() const { + return m_Libs; + } +}; + +class Dyld { + + struct BasePathHashFunction { + size_t operator()(const BasePath& item) const { + return std::hash()(item); + } + }; + + struct BasePathEqFunction { + size_t operator()(const BasePath& l, const BasePath& r) const { + return &l == &r || l == r; + } + }; + /// A memory efficient llvm::VectorSet. The class provides O(1) search + /// complexity. It is tuned to compare BasePaths first by checking the + /// address and then the representation which models the base path reuse. + class BasePaths { + std::unordered_set m_Paths; + + public: + const BasePath& RegisterBasePath(const std::string& Path, + bool* WasInserted = nullptr) { + auto it = m_Paths.insert(Path); + if (WasInserted) + *WasInserted = it.second; + + return *it.first; + } + + bool Contains (const std::string& Path) { + return m_Paths.count(Path); + } + }; + + bool m_FirstRun = true; + bool m_FirstRunSysLib = true; + bool m_UseBloomFilter = true; + bool m_UseHashTable = true; + + const cling::DynamicLibraryManager& m_DynamicLibraryManager; + + /// The basename of `/home/.../lib/libA.so`, + /// m_BasePaths will contain `/home/.../lib/` + BasePaths m_BasePaths; + + LibraryPaths m_Libraries; + LibraryPaths m_SysLibraries; + /// Contains a set of libraries which we gave to the user via ResolveSymbol + /// call and next time we should check if the user loaded them to avoid + /// useless iterations. + std::vector m_QueriedLibraries; + + /// Scan for shared objects which are not yet loaded. They are a our symbol + /// resolution candidate sources. + /// NOTE: We only scan not loaded shared objects. + /// \param[in] searchSystemLibraries - whether to decent to standard system + /// locations for shared objects. + void ScanForLibraries(bool searchSystemLibraries = false); + + /// Builds a bloom filter lookup optimization. + void BuildBloomFilter(LibraryPath* Lib, llvm::object::ObjectFile *BinObjFile, + unsigned IgnoreSymbolFlags = 0) const; + + + /// Looks up symbols from a an object file, representing the library. + ///\param[in] Lib - full path to the library. + ///\param[in] mangledName - the mangled name to look for. + ///\param[in] IgnoreSymbolFlags - The symbols to ignore upon a match. + ///\returns true on success. + bool ContainsSymbol(const LibraryPath* Lib, const std::string &mangledName, + unsigned IgnoreSymbolFlags = 0) const; + +protected: + Dyld(const cling::DynamicLibraryManager &DLM) + : m_DynamicLibraryManager(DLM) { } + + ~Dyld() = default; + +public: + static Dyld& getInstance(const cling::DynamicLibraryManager &DLM) { + static Dyld instance(DLM); + +#ifndef NDEBUG + auto &NewSearchPaths = DLM.getSearchPaths(); + auto &OldSearchPaths = instance.m_DynamicLibraryManager.getSearchPaths(); + // FIXME: Move the Dyld logic to the cling::DynamicLibraryManager itself! + assert(std::equal(OldSearchPaths.begin(), OldSearchPaths.end(), + NewSearchPaths.begin()) && "Path was added/removed!"); +#endif + + return instance; + } + + // delete copy and move constructors and assign operators + Dyld(Dyld const&) = delete; + Dyld(Dyld&&) = delete; + Dyld& operator=(Dyld const&) = delete; + Dyld& operator=(Dyld &&) = delete; + + std::string searchLibrariesForSymbol(const std::string& mangledName, + bool searchSystem); +}; + + +static bool s_IsDyldInitialized = false; +static std::function s_ShouldPermanentlyIgnoreCallback; + + +static std::string getRealPath(llvm::StringRef path) { + llvm::SmallString<512> realPath; + llvm::sys::fs::real_path(path, realPath, /*expandTilde*/true); + return realPath.str().str(); +} + +static llvm::StringRef s_ExecutableFormat; + +static bool shouldPermanentlyIgnore(const std::string& FileName, + const cling::DynamicLibraryManager& dyLibManager) { + assert(FileName == getRealPath(FileName)); + assert(!s_ExecutableFormat.empty() && "Failed to find the object format!"); + + if (llvm::sys::fs::is_directory(FileName)) + return true; + + if (!cling::DynamicLibraryManager::isSharedLibrary(FileName)) + return true; + + // No need to check linked libraries, as this function is only invoked + // for symbols that cannot be found (neither by dlsym nor in the JIT). + if (dyLibManager.isLibraryLoaded(FileName.c_str())) + return true; + + + auto ObjF = llvm::object::ObjectFile::createObjectFile(FileName); + if (!ObjF) { + if (DEBUG > 1) + cling::errs() << "[DyLD] Failed to read object file " + << FileName << "\n"; + return true; + } + + llvm::object::ObjectFile *file = ObjF.get().getBinary(); + + if (DEBUG > 1) + cling::errs() << "Current executable format: " << s_ExecutableFormat + << ". Executable format of " << FileName << " : " + << file->getFileFormatName() << "\n"; + + // Ignore libraries with different format than the executing one. + if (s_ExecutableFormat != file->getFileFormatName()) + return true; + + if (llvm::isa(*file)) { + for (auto S : file->sections()) { + llvm::StringRef name; + S.getName(name); + if (name == ".text") { + // Check if the library has only debug symbols, usually when + // stripped with objcopy --only-keep-debug. This check is done by + // reading the manual of objcopy and inspection of stripped with + // objcopy libraries. + auto SecRef = static_cast(S); + if (SecRef.getType() == llvm::ELF::SHT_NOBITS) + return true; + + return (SecRef.getFlags() & llvm::ELF::SHF_ALLOC) == 0; + } + } + return true; + } + + //FIXME: Handle osx using isStripped after upgrading to llvm9. + + return s_ShouldPermanentlyIgnoreCallback(FileName); +} + +void Dyld::ScanForLibraries(bool searchSystemLibraries/* = false*/) { + + // #ifndef NDEBUG + // if (!m_FirstRun && !m_FirstRunSysLib) + // assert(0 && "Already initialized"); + // if (m_FirstRun && !m_Libraries->size()) + // assert(0 && "Not initialized but m_Libraries is non-empty!"); + // // assert((m_FirstRun || m_FirstRunSysLib) && (m_Libraries->size() || + // m_SysLibraries.size()) + // // && "Already scanned and initialized!"); + // #endif + + const auto &searchPaths = m_DynamicLibraryManager.getSearchPaths(); + for (const cling::DynamicLibraryManager::SearchPathInfo &Info : searchPaths) { + if (Info.IsUser || searchSystemLibraries) { + // Examples which we should handle. + // File Real + // /lib/1/1.so /lib/1/1.so // file + // /lib/1/2.so->/lib/1/1.so /lib/1/1.so // file local link + // /lib/1/3.so->/lib/3/1.so /lib/3/1.so // file external link + // /lib/2->/lib/1 // path link + // /lib/2/1.so /lib/1/1.so // path link, file + // /lib/2/2.so->/lib/1/1.so /lib/1/1.so // path link, file local link + // /lib/2/3.so->/lib/3/1.so /lib/3/1.so // path link, file external link + // + // /lib/3/1.so + // /lib/3/2.so->/system/lib/s.so + // /lib/3/3.so + // /system/lib/1.so + // + // Paths = /lib/1 : /lib/2 : /lib/3 + + // m_BasePaths = ["/lib/1", "/lib/3", "/system/lib"] + // m_*Libraries = [<0,"1.so">, <1,"1.so">, <2,"s.so">, <1,"3.so">] + std::string RealPath = getRealPath(Info.Path); + llvm::StringRef DirPath(RealPath); + + if (!llvm::sys::fs::is_directory(DirPath) || DirPath.empty()) + continue; + + // Already searched? + bool WasInserted; + m_BasePaths.RegisterBasePath(RealPath, &WasInserted); + + if (!WasInserted) + continue; + + std::error_code EC; + for (llvm::sys::fs::directory_iterator DirIt(DirPath, EC), DirEnd; + DirIt != DirEnd && !EC; DirIt.increment(EC)) { + + // FIXME: Use a StringRef here! + std::string FileName = getRealPath(DirIt->path()); + assert(!llvm::sys::fs::is_symlink_file(FileName)); + + if (shouldPermanentlyIgnore(FileName, m_DynamicLibraryManager)) + continue; + + std::string FileRealPath = llvm::sys::path::parent_path(FileName); + FileName = llvm::sys::path::filename(FileName); + const BasePath& BaseP = m_BasePaths.RegisterBasePath(FileRealPath); + LibraryPath LibPath(BaseP, FileName); + if (m_SysLibraries.HasRegisteredLib(LibPath) || + m_Libraries.HasRegisteredLib(LibPath)) + continue; + + if (searchSystemLibraries) + m_SysLibraries.RegisterLib(LibPath); + else + m_Libraries.RegisterLib(LibPath); + } + } + } +} + +void Dyld::BuildBloomFilter(LibraryPath* Lib, + llvm::object::ObjectFile *BinObjFile, + unsigned IgnoreSymbolFlags /*= 0*/) const { + assert(m_UseBloomFilter && "Bloom filter is disabled"); + assert(Lib->isBloomFilterEmpty() && "Already built!"); + + using namespace llvm; + using namespace llvm::object; + + // If BloomFilter is empty then build it. + // Count Symbols and generate BloomFilter + uint32_t SymbolsCount = 0; + std::list symbols; + for (const llvm::object::SymbolRef &S : BinObjFile->symbols()) { + uint32_t Flags = S.getFlags(); + // Do not insert in the table symbols flagged to ignore. + if (Flags & IgnoreSymbolFlags) + continue; + + // Note, we are at last resort and loading library based on a weak + // symbol is allowed. Otherwise, the JIT will issue an unresolved + // symbol error. + // + // There are other weak symbol kinds (marked as 'V') to denote + // typeinfo and vtables. It is unclear whether we should load such + // libraries or from which library we should resolve the symbol. + // We seem to not have a way to differentiate it from the symbol API. + + llvm::Expected SymNameErr = S.getName(); + if (!SymNameErr) { + cling::errs()<< "Dyld::BuildBloomFilter: Failed to read symbol " + << SymNameErr.get() << "\n"; + continue; + } + + if (SymNameErr.get().empty()) + continue; + + ++SymbolsCount; + symbols.push_back(SymNameErr.get()); + } + + if (BinObjFile->isELF()) { + // ELF file format has .dynstr section for the dynamic symbol table. + const auto *ElfObj = cast(BinObjFile); + + for (const object::SymbolRef &S : ElfObj->getDynamicSymbolIterators()) { + uint32_t Flags = S.getFlags(); + // DO NOT insert to table if symbol was undefined + if (Flags & llvm::object::SymbolRef::SF_Undefined) + continue; + + // Note, we are at last resort and loading library based on a weak + // symbol is allowed. Otherwise, the JIT will issue an unresolved + // symbol error. + // + // There are other weak symbol kinds (marked as 'V') to denote + // typeinfo and vtables. It is unclear whether we should load such + // libraries or from which library we should resolve the symbol. + // We seem to not have a way to differentiate it from the symbol API. + + llvm::Expected SymNameErr = S.getName(); + if (!SymNameErr) { + cling::errs() << "Dyld::BuildBloomFilter: Failed to read symbol " + < 7) + cling::errs() << "Dyld::BuildBloomFilter: No symbols!\n"; + return; + } + + if (DEBUG > 7) { + cling::errs() << "Dyld::BuildBloomFilter: Symbols:\n"; + for (auto it : symbols) + cling::errs() << "Dyld::BuildBloomFilter" << "- " << it << "\n"; + } + + Lib->InitializeBloomFilter(SymbolsCount); + // Generate BloomFilter + for (const auto &S : symbols) { + if (m_UseHashTable) + Lib->AddBloom(Lib->AddSymbol(S)); + else + Lib->AddBloom(S); + } +} + + +static llvm::StringRef GetGnuHashSection(llvm::object::ObjectFile *file) { + for (auto S : file->sections()) { + llvm::StringRef name; + S.getName(name); + if (name == ".gnu.hash") { + llvm::StringRef content; + S.getContents(content); + return content; + } + } + return ""; +} + +/// Bloom filter in a stohastic data structure which can tell us if a symbol +/// name does not exist in a library with 100% certainty. If it tells us it +/// exists this may not be true: +/// https://blogs.oracle.com/solaris/gnu-hash-elf-sections-v2 +/// +/// ELF has this optimization in the new linkers by default, It is stored in the +/// gnu.hash section of the object file. +/// +///\returns true if the symbol may be in the library. +static bool MayExistInElfObjectFile(llvm::object::ObjectFile *soFile, + uint32_t hash) { + assert(soFile->isELF() && "Not ELF"); + + // LLVM9: soFile->makeTriple().is64Bit() + const int bits = 8 * soFile->getBytesInAddress(); + + llvm::StringRef contents = GetGnuHashSection(soFile); + if (contents.size() < 16) + // We need to search if the library doesn't have .gnu.hash section! + return true; + const char* hashContent = contents.data(); + + // See https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ for .gnu.hash + // table layout. + uint32_t maskWords = *reinterpret_cast(hashContent + 8); + uint32_t shift2 = *reinterpret_cast(hashContent + 12); + uint32_t hash2 = hash >> shift2; + uint32_t n = (hash / bits) % maskWords; + + const char *bloomfilter = hashContent + 16; + const char *hash_pos = bloomfilter + n*(bits/8); // * (Bits / 8) + uint64_t word = *reinterpret_cast(hash_pos); + uint64_t bitmask = ( (1ULL << (hash % bits)) | (1ULL << (hash2 % bits))); + return (bitmask & word) == bitmask; +} + +bool Dyld::ContainsSymbol(const LibraryPath* Lib, + const std::string &mangledName, + unsigned IgnoreSymbolFlags /*= 0*/) const { + const std::string library_filename = Lib->GetFullName(); + + if (DEBUG > 7) { + cling::errs() << "Dyld::ContainsSymbol: Find symbol: lib=" + << library_filename << ", mangled=" + << mangledName << "\n"; + } + + auto ObjF = llvm::object::ObjectFile::createObjectFile(library_filename); + if (llvm::Error Err = ObjF.takeError()) { + if (DEBUG > 1) { + std::string Message; + handleAllErrors(std::move(Err), [&](llvm::ErrorInfoBase &EIB) { + Message += EIB.message() + "; "; + }); + cling::errs() << "Dyld::ContainsSymbol: Failed to read object file " + << library_filename << " Errors: " << Message << "\n"; + } + return false; + } + + llvm::object::ObjectFile *BinObjFile = ObjF.get().getBinary(); + + uint32_t hashedMangle = GNUHash(mangledName); + // Check for the gnu.hash section if ELF. + // If the symbol doesn't exist, exit early. + if (BinObjFile->isELF() && !MayExistInElfObjectFile(BinObjFile, hashedMangle)) + return false; + + if (m_UseBloomFilter) { + // Use our bloom filters and create them if necessary. + if (Lib->isBloomFilterEmpty()) + BuildBloomFilter(const_cast(Lib), BinObjFile, + IgnoreSymbolFlags); + + // If the symbol does not exist, exit early. In case it may exist, iterate. + if (!Lib->MayExistSymbol(hashedMangle)) { + if (DEBUG > 7) + cling::errs() << "Dyld::ContainsSymbol: BloomFilter: Skip symbol.\n"; + return false; + } + if (DEBUG > 7) + cling::errs() << "Dyld::ContainsSymbol: BloomFilter: Symbol May exist." + << " Search for it."; + } + + if (m_UseHashTable) { + bool result = Lib->ExistSymbol(mangledName); + if (DEBUG > 7) + cling::errs() << "Dyld::ContainsSymbol: HashTable: Symbol " + << (result ? "Exist" : "Not exist") << "\n"; + return result; + } + + // Symbol may exist. Iterate. + + // If no hash symbol then iterate to detect symbol + // We Iterate only if BloomFilter and/or SymbolHashTable are not supported. + for (const llvm::object::SymbolRef &S : BinObjFile->symbols()) { + uint32_t Flags = S.getFlags(); + // Do not insert in the table symbols flagged to ignore. + if (Flags & IgnoreSymbolFlags) + continue; + + // Note, we are at last resort and loading library based on a weak + // symbol is allowed. Otherwise, the JIT will issue an unresolved + // symbol error. + // + // There are other weak symbol kinds (marked as 'V') to denote + // typeinfo and vtables. It is unclear whether we should load such + // libraries or from which library we should resolve the symbol. + // We seem to not have a way to differentiate it from the symbol API. + + llvm::Expected SymNameErr = S.getName(); + if (!SymNameErr) { + cling::errs() << "Dyld::ContainsSymbol: Failed to read symbol " + << mangledName << "\n"; + continue; + } + + if (SymNameErr.get().empty()) + continue; + + if (SymNameErr.get() == mangledName) { + if (DEBUG > 1) { + cling::errs() << "Dyld::ContainsSymbol: Symbol " + << mangledName << " found in " + << library_filename << "\n"; + return true; + } + } + } + + if (!BinObjFile->isELF()) + return false; + + // ELF file format has .dynstr section for the dynamic symbol table. + const auto *ElfObj = llvm::cast(BinObjFile); + + for (const llvm::object::SymbolRef &S : ElfObj->getDynamicSymbolIterators()) { + uint32_t Flags = S.getFlags(); + // DO NOT insert to table if symbol was undefined + if (Flags & llvm::object::SymbolRef::SF_Undefined) + continue; + + // Note, we are at last resort and loading library based on a weak + // symbol is allowed. Otherwise, the JIT will issue an unresolved + // symbol error. + // + // There are other weak symbol kinds (marked as 'V') to denote + // typeinfo and vtables. It is unclear whether we should load such + // libraries or from which library we should resolve the symbol. + // We seem to not have a way to differentiate it from the symbol API. + + llvm::Expected SymNameErr = S.getName(); + if (!SymNameErr) { + cling::errs() << "Dyld::ContainsSymbol: Failed to read symbol " + << mangledName << "\n"; + continue; + } + + if (SymNameErr.get().empty()) + continue; + + if (SymNameErr.get() == mangledName) + return true; + } + return false; +} + +std::string Dyld::searchLibrariesForSymbol(const std::string& mangledName, + bool searchSystem/* = true*/) { + assert(!llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(mangledName) && + "Library already loaded, please use dlsym!"); + assert(!mangledName.empty()); + using namespace llvm::sys::path; + using namespace llvm::sys::fs; + + if (m_FirstRun) { + ScanForLibraries(/* SearchSystemLibraries= */ false); + m_FirstRun = false; + } + + if (!m_QueriedLibraries.empty()) { + // Last call we were asked if a library contains a symbol. Usually, the + // caller wants to load this library. Check if was loaded and remove it + // from our lists of not-yet-loaded libs. + + if (DEBUG > 7) { + cling::errs() << "Dyld::ResolveSymbol: m_QueriedLibraries:\n"; + size_t x = 0; + for (auto item : m_QueriedLibraries) { + cling::errs() << "Dyld::ResolveSymbol - [" << x++ << "]:" + << &item << ": " << item.m_Path << ", " + << item.m_LibName << "\n"; + } + } + + for (const LibraryPath& P : m_QueriedLibraries) { + const std::string LibName = P.GetFullName(); + if (!m_DynamicLibraryManager.isLibraryLoaded(LibName)) + continue; + + m_Libraries.UnregisterLib(P); + m_SysLibraries.UnregisterLib(P); + } + } + + // Iterate over files under this path. We want to get each ".so" files + for (const LibraryPath* P : m_Libraries.GetLibraries()) { + const std::string LibName = P->GetFullName(); + + if (ContainsSymbol(P, mangledName, /*ignore*/ + llvm::object::SymbolRef::SF_Undefined)) { + m_QueriedLibraries.push_back(*P); + return LibName; + } + } + + if (!searchSystem) + return ""; + + if (DEBUG > 7) + cling::errs() << "Dyld::ResolveSymbol: SearchSystem!\n"; + + // Lookup in non-system libraries failed. Expand the search to the system. + if (m_FirstRunSysLib) { + ScanForLibraries(/* SearchSystemLibraries= */ true); + m_FirstRunSysLib = false; + } + + for (const LibraryPath* P : m_SysLibraries.GetLibraries()) { + const std::string LibName = P->GetFullName(); + if (ContainsSymbol(P, mangledName, /*ignore*/ + llvm::object::SymbolRef::SF_Undefined | + llvm::object::SymbolRef::SF_Weak)) { + m_QueriedLibraries.push_back(*P); + return LibName; + } + } + + if (DEBUG > 7) + cling::errs() << "Dyld::ResolveSymbol: Search found no match!\n"; + + /* + if (DEBUG > 7) { + cling::errs() << "Dyld::ResolveSymbol: Structs after ResolveSymbol:\n"); + + cling::errs() << "Dyld::ResolveSymbol - sPaths:\n"); + size_t x = 0; + for (const auto &item : sPaths.GetPaths()) + cling::errs() << "Dyld::ResolveSymbol << [" x++ << "]: " << item << "\n"; + + cling::errs() << "Dyld::ResolveSymbol - sLibs:\n"); + x = 0; + for (const auto &item : sLibraries.GetLibraries()) + cling::errs() << "Dyld::ResolveSymbol [" + << x++ << "]: " << item->Path << ", " + << item->LibName << "\n"; + + cling::errs() << "Dyld::ResolveSymbol - sSysLibs:"); + x = 0; + for (const auto &item : sSysLibraries.GetLibraries()) + cling::errs() << "Dyld::ResolveSymbol [" + << x++ << "]: " << item->Path << ", " + << item->LibName << "\n"; + + Info("Dyld::ResolveSymbol", "- sQueriedLibs:"); + x = 0; + for (const auto &item : sQueriedLibraries) + cling::errs() << "Dyld::ResolveSymbol [" + << x++ << "]: " << item->Path << ", " + << item->LibName << "\n"; + } + */ + + return ""; // Search found no match. +} +} // anon namespace + +// This function isn't referenced outside its translation unit, but it +// can't use the "static" keyword because its address is used for +// GetMainExecutable (since some platforms don't support taking the +// address of main, and some platforms can't implement GetMainExecutable +// without being given the address of a function in the main executable). +std::string GetExecutablePath() { + // This just needs to be some symbol in the binary; C++ doesn't + // allow taking the address of ::main however. + return cling::DynamicLibraryManager::getSymbolLocation(&GetExecutablePath); +} + +namespace cling { + void DynamicLibraryManager::initializeDyld( + std::function shouldPermanentlyIgnore) const { + assert(!s_IsDyldInitialized); + s_ShouldPermanentlyIgnoreCallback = shouldPermanentlyIgnore; + + std::string exeP = GetExecutablePath(); + auto ObjF = + cantFail(llvm::object::ObjectFile::createObjectFile(exeP)); + s_ExecutableFormat = ObjF.getBinary()->getFileFormatName(); + + s_IsDyldInitialized = true; + } + + std::string + DynamicLibraryManager::searchLibrariesForSymbol(const std::string& mangledName, + bool searchSystem/* = true*/) const { + assert(s_IsDyldInitialized && "Must call initialize dyld before!"); + static Dyld& dyld = Dyld::getInstance(*this); + return dyld.searchLibrariesForSymbol(mangledName, searchSystem); + } + + std::string DynamicLibraryManager::getSymbolLocation(void *func) { +#if defined(__CYGWIN__) && defined(__GNUC__) + return {}; +#elif defined(LLVM_ON_WIN32) + MEMORY_BASIC_INFORMATION mbi; + if (!VirtualQuery (func, &mbi, sizeof (mbi))) + return {}; + + HMODULE hMod = (HMODULE) mbi.AllocationBase; + char moduleName[MAX_PATH]; + + if (!GetModuleFileNameA (hMod, moduleName, sizeof (moduleName))) + return {}; + + return getRealPath(moduleName); +#else + // assume we have defined HAVE_DLFCN_H and HAVE_DLADDR + Dl_info info; + if (dladdr((void*)func, &info) == 0) { + // Not in a known shared library, let's give up + return {}; + } else { + if (strchr(info.dli_fname, '/')) + return getRealPath(info.dli_fname); + // Else absolute path. For all we know that's a binary. + // Some people have dictionaries in binaries, this is how we find their + // path: (see also https://stackoverflow.com/a/1024937/6182509) +# if defined(__APPLE__) + char buf[PATH_MAX] = { 0 }; + uint32_t bufsize = sizeof(buf); + if (_NSGetExecutablePath(buf, &bufsize) >= 0) + return getRealPath(buf); + return getRealPath(info.dli_fname); +# elif defined(LLVM_ON_UNIX) + char buf[PATH_MAX] = { 0 }; + // Cross our fingers that /proc/self/exe exists. + if (readlink("/proc/self/exe", buf, sizeof(buf)) > 0) + return getRealPath(buf); + std::string pipeCmd = std::string("which \"") + info.dli_fname + "\""; + FILE* pipe = popen(pipeCmd.c_str(), "r"); + if (!pipe) + return getRealPath(info.dli_fname); + std::string result; + while (fgets(buf, sizeof(buf), pipe)) + result += buf; + + pclose(pipe); + return getRealPath(result); +# else +# error "Unsupported platform." +# endif + return {}; + } +#endif + } + +} // namespace cling