diff --git a/README/CREDITS b/README/CREDITS index fc6242da0ce0d..d5a9cc5ea63c3 100644 --- a/README/CREDITS +++ b/README/CREDITS @@ -776,6 +776,11 @@ N: Iulia Pasov E: iulia.pasov@gmail.com D: prototyping JavaScript graphics with d3.js +N: Alexander Penev +E: alexander_penev@yahoo.com +D: Dyld symbol resolution facilities in cling and TCling +D: runtime c++ modules on osx + N: Marc Paterno E: paterno@fnal.gov D: implement utility functions used by TGraphAsymmErrors::BayesDivide diff --git a/core/metacling/src/TCling.cxx b/core/metacling/src/TCling.cxx index 5fffaea73dd81..a8bcca6811b9f 100644 --- a/core/metacling/src/TCling.cxx +++ b/core/metacling/src/TCling.cxx @@ -653,11 +653,6 @@ extern "C" void TCling__SplitAclicMode(const char* fileName, string &mode, io = aclicio.Data(); fname = f.Data(); } -// Implemented in TClingCallbacks. -extern "C" void TCling__FindLoadedLibraries(std::vector> &sLibraries, - std::vector &sPaths, - cling::Interpreter &interpreter, bool searchSystem); - //______________________________________________________________________________ // // @@ -1531,9 +1526,16 @@ TCling::TCling(const char *name, const char *title, const char* const argv[]) fInterpreter->setCallbacks(std::move(clingCallbacks)); if (!fromRootCling) { + cling::DynamicLibraryManager& DLM = *fInterpreter->getDynamicLibraryManager(); // Make sure cling looks into ROOT's libdir, even if not part of LD_LIBRARY_PATH // e.g. because of an RPATH build. - fInterpreter->getDynamicLibraryManager()->addSearchPath(TROOT::GetLibDir().Data()); + DLM.addSearchPath(TROOT::GetLibDir().Data()); + auto ShouldPermanentlyIgnore = [](llvm::StringRef FileName) -> bool{ + llvm::StringRef stem = llvm::sys::path::stem(FileName); + return stem.startswith("libNew") || stem.startswith("libcppyy_backend"); + }; + // Initialize the dyld for the llvmLazyFunctionCreator. + DLM.initializeDyld(ShouldPermanentlyIgnore); } } @@ -1584,69 +1586,6 @@ void TCling::ShutDown() ResetGlobals(); } -//////////////////////////////////////////////////////////////////////////////// -/// Wrapper around dladdr (and friends) - -static std::string FindLibraryName(void (*func)()) -{ -#if defined(__CYGWIN__) && defined(__GNUC__) - return {}; -#elif defined(G__WIN32) - MEMORY_BASIC_INFORMATION mbi; - if (!VirtualQuery (func, &mbi, sizeof (mbi))) - { - return {}; - } - - HMODULE hMod = (HMODULE) mbi.AllocationBase; - char moduleName[MAX_PATH]; - - if (!GetModuleFileNameA (hMod, moduleName, sizeof (moduleName))) - { - return {}; - } - return ROOT::TMetaUtils::GetRealPath(moduleName); -#else - Dl_info info; - if (dladdr((void*)func, &info) == 0) { - // Not in a known shared library, let's give up - return {}; - } else { - if (strchr(info.dli_fname, '/')) - return ROOT::TMetaUtils::GetRealPath(info.dli_fname); - // Else absolute path. For all we know that's a binary. - // Some people have dictionaries in binaries, this is how we find their path: - // (see also https://stackoverflow.com/a/1024937/6182509) -# if defined(R__MACOSX) - char buf[PATH_MAX] = { 0 }; - uint32_t bufsize = sizeof(buf); - if (_NSGetExecutablePath(buf, &bufsize) >= 0) - return ROOT::TMetaUtils::GetRealPath(buf); - return ROOT::TMetaUtils::GetRealPath(info.dli_fname); -# elif defined(R__UNIX) - char buf[PATH_MAX] = { 0 }; - // Cross our fingers that /proc/self/exe exists. - if (readlink("/proc/self/exe", buf, sizeof(buf)) > 0) - return ROOT::TMetaUtils::GetRealPath(buf); - std::string pipeCmd = std::string("which \"") + info.dli_fname + "\""; - FILE* pipe = popen(pipeCmd.c_str(), "r"); - if (!pipe) - return ROOT::TMetaUtils::GetRealPath(info.dli_fname); - std::string result; - while (fgets(buf, sizeof(buf), pipe)) { - result += buf; - } - pclose(pipe); - return ROOT::TMetaUtils::GetRealPath(result); -# else -# error "Unsupported platform." -# endif - return {}; - } -#endif - -} - //////////////////////////////////////////////////////////////////////////////// /// Helper to initialize TVirtualStreamerInfo's factor early. /// Use static initialization to insure only one TStreamerInfo is created. @@ -2068,7 +2007,7 @@ void TCling::RegisterModule(const char* modulename, if (payloadCode) code += payloadCode; - std::string dyLibName = FindLibraryName(triggerFunc); + std::string dyLibName = cling::DynamicLibraryManager::getSymbolLocation(triggerFunc); assert(!llvm::sys::fs::is_symlink_file(dyLibName)); if (dyLibName.empty()) { @@ -6243,260 +6182,42 @@ bool TCling::LibraryLoadingFailed(const std::string& errmessage, const std::stri return false; } -// This is a GNU implementation of hash used in bloom filter! -static uint32_t GNUHash(StringRef S) { - uint32_t H = 5381; - for (uint8_t C : S) - H = (H << 5) + H + C; - return H; -} - -static StringRef GetGnuHashSection(llvm::object::ObjectFile *file) { - for (auto S : file->sections()) { - StringRef name; - S.getName(name); - if (name == ".gnu.hash") { - StringRef content; - S.getContents(content); - return content; - } - } - return ""; -} - -/// Bloom filter in a stohastic data structure which can tell us if a symbol -/// name does not exist in a library with 100% certainty. If it tells us it exists -/// this may not be true: https://blogs.oracle.com/solaris/gnu-hash-elf-sections-v2 -/// -/// ELF has this optimization in the new linkers by default, It is stored in the -/// gnu.hash section of the object file. -/// -///\returns true true if the symbol may be in the library. -static bool MayExistInObjectFile(llvm::object::ObjectFile *soFile, uint32_t hash) { - if (!soFile->isELF()) - return true; - - // LLVM9: soFile->makeTriple().is64Bit() - const int bits = 8 * soFile->getBytesInAddress(); - - StringRef contents = GetGnuHashSection(soFile); - if (contents.size() < 16) - // We need to search if the library doesn't have .gnu.hash section! - return true; - const char* hashContent = contents.data(); - - // See https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ for .gnu.hash table layout. - uint32_t maskWords = *reinterpret_cast(hashContent + 8); - uint32_t shift2 = *reinterpret_cast(hashContent + 12); - uint32_t hash2 = hash >> shift2; - uint32_t n = (hash / bits) % maskWords; - - const char *bloomfilter = hashContent + 16; - const char *hash_pos = bloomfilter + n*(bits/8); // * (Bits / 8) - uint64_t word = *reinterpret_cast(hash_pos); - uint64_t bitmask = ( (1ULL << (hash % bits)) | (1ULL << (hash2 % bits))); - return (bitmask & word) == bitmask; -} - -/// Looks up symbols from a an object file, representing the library. -///\returns true on success. -static bool FindSymbol(const std::string &library_filename, - const std::string &mangled_name, unsigned IgnoreSymbolFlags = 0) -{ - auto ObjF = llvm::object::ObjectFile::createObjectFile(ROOT::TMetaUtils::GetRealPath(library_filename)); - if (!ObjF) { - if (gDebug > 1) - Warning("TCling__FindSymbol", "Failed to read object file %s", library_filename.c_str()); - return false; - } - - llvm::object::ObjectFile *BinObjFile = ObjF.get().getBinary(); - - uint32_t hashedMangle = GNUHash(mangled_name); - // If the symbol does not exist, exit early. In case it may exist, iterate. - if (!MayExistInObjectFile(BinObjFile, hashedMangle)) - return false; - - for (const auto &S : BinObjFile->symbols()) { - uint32_t Flags = S.getFlags(); - // Do not insert in the table symbols flagged to ignore. - if (Flags & IgnoreSymbolFlags) - continue; - - // Note, we are at last resort and loading library based on a weak - // symbol is allowed. Otherwise, the JIT will issue an unresolved - // symbol error. - // - // There are other weak symbol kinds (marked as 'V') to denote - // typeinfo and vtables. It is unclear whether we should load such - // libraries or from which library we should resolve the symbol. - // We seem to not have a way to differentiate it from the symbol API. - - llvm::Expected SymNameErr = S.getName(); - if (!SymNameErr) { - Warning("TCling__FindSymbol", "Failed to read symbol %s", mangled_name.c_str()); - continue; - } - - if (SymNameErr.get() == mangled_name) { - if (gDebug > 1) - Info("TCling__FindSymbol", "Symbol %s found in %s\n", - mangled_name.c_str(), library_filename.c_str()); - return true; - } - } - - if (!BinObjFile->isELF()) - return false; - - // ELF file format has .dynstr section for the dynamic symbol table. - const auto *ElfObj = cast(BinObjFile); - - for (const auto &S : ElfObj->getDynamicSymbolIterators()) { - uint32_t Flags = S.getFlags(); - // DO NOT insert to table if symbol was undefined - if (Flags & llvm::object::SymbolRef::SF_Undefined) - continue; - - // Note, we are at last resort and loading library based on a weak - // symbol is allowed. Otherwise, the JIT will issue an unresolved - // symbol error. - // - // There are other weak symbol kinds (marked as 'V') to denote - // typeinfo and vtables. It is unclear whether we should load such - // libraries or from which library we should resolve the symbol. - // We seem to not have a way to differentiate it from the symbol API. - - llvm::Expected SymNameErr = S.getName(); - if (!SymNameErr) { - Warning("TCling__FindSymbol", "Failed to read symbol %s", mangled_name.c_str()); - continue; - } - - if (SymNameErr.get() == mangled_name) - return true; - } - - return false; -} - -static std::string ResolveSymbol(const std::string& mangled_name, - cling::Interpreter *interp, - bool searchSystem = true) { - assert(!mangled_name.empty()); - using namespace llvm::sys::path; - using namespace llvm::sys::fs; - +static void* LazyFunctionCreatorAutoloadForModule(const std::string &mangled_name, + const cling::DynamicLibraryManager &DLM) { R__LOCKGUARD(gInterpreterMutex); - static bool sFirstRun = true; - static bool sFirstSystemLibrary = true; - // LibraryPath contains a pair offset to the canonical dirname (stored as - // sPaths[i]) and a filename. For example, `/home/foo/root/lib/libTMVA.so`, - // the .first will contain an index in sPaths where `/home/foo/root/lib/` - // will be stored and .second `libTMVA.so`. - // This approach reduces the duplicate paths as at one location there may be - // plenty of libraries. - using LibraryPath = std::pair; - using LibraryPaths = std::vector; - using BasePaths = std::vector; - static LibraryPaths sLibraries; - static BasePaths sPaths; - static LibraryPaths sQueriedLibraries; - - // For system header AutoLoading - static LibraryPaths sSysLibraries; - - if (sFirstRun) { - TCling__FindLoadedLibraries(sLibraries, sPaths, *interp, /* searchSystem */ false); - sFirstRun = false; - } - - auto GetLibFileName = [](const LibraryPath &P, const BasePaths &BaseP) { - llvm::SmallString<512> Vec(BaseP[P.first]); - llvm::sys::path::append(Vec, StringRef(P.second)); - return Vec.str().str(); + auto LibLoader = [](const std::string& LibName) -> bool { + if (gSystem->Load(LibName.c_str(), "", false) < 0) { + Error("TCling__LazyFunctionCreatorAutoloadForModule", + "Failed to load library %s", LibName.c_str()); + return false; + } + return true; //success. }; - if (!sQueriedLibraries.empty()) { - // Last call we were asked if a library contains a symbol. Usually, the - // caller wants to load this library. Check if was loaded and remove it - // from our lists of not-yet-loaded libs. - for (const LibraryPath &P : sQueriedLibraries) { - const std::string LibName = GetLibFileName(P, sPaths); - if (!gCling->IsLibraryLoaded(LibName.c_str())) - continue; - - sLibraries.erase(std::remove(sLibraries.begin(), sLibraries.end(), P), sLibraries.end()); - if (!sSysLibraries.empty()) - sSysLibraries.erase(std::remove(sSysLibraries.begin(), sSysLibraries.end(), P), sSysLibraries.end()); - } - } - - if (sFirstRun) { - TCling__FindLoadedLibraries(sLibraries, sPaths, *interp, /* searchSystem */ false); - sFirstRun = false; - } - - // Iterate over files under this path. We want to get each ".so" files - for (const LibraryPath &P : sLibraries) { - const std::string LibName = GetLibFileName(P, sPaths); - - // FIXME: We should also iterate over the dynamic symbols for ROOT - // libraries. However, it seems to be redundant for the moment as we do - // not strictly require symbols from those sections. Enable after checking - // performance! - if (FindSymbol(LibName, mangled_name, /*ignore*/ - llvm::object::SymbolRef::SF_Undefined)) { - sQueriedLibraries.push_back(P); - return LibName; - } - } - - if (!searchSystem) - return ""; - - // Lookup in non-system libraries failed. Expand the search to the system. - if (sFirstSystemLibrary) { - TCling__FindLoadedLibraries(sSysLibraries, sPaths, *interp, /* searchSystem */ true); - sFirstSystemLibrary = false; - } - - for (const LibraryPath &P : sSysLibraries) { - const std::string LibName = GetLibFileName(P, sPaths); - - if (FindSymbol(LibName, mangled_name, /*ignore*/ - llvm::object::SymbolRef::SF_Undefined | - llvm::object::SymbolRef::SF_Weak)) { - sQueriedLibraries.push_back(P); - return LibName; - } - } - - return ""; // Search found no match. -} - -static void* LazyFunctionCreatorAutoloadForModule(const std::string &mangled_name, - cling::Interpreter *interp) { -// The JIT gives us a mangled name which has only one leading underscore on -// all platforms, for instance _ZN8TRandom34RndmEv. However, on OSX the -// linker stores this symbol as __ZN8TRandom34RndmEv (adding an extra _). - std::string maybe_prefixed_mangled_name = mangled_name; #ifdef R__MACOSX + // The JIT gives us a mangled name which has only one leading underscore on + // all platforms, for instance _ZN8TRandom34RndmEv. However, on OSX the + // linker stores this symbol as __ZN8TRandom34RndmEv (adding an extra _). assert(!llvm::StringRef(mangled_name).startswith("__") && "Already added!"); - maybe_prefixed_mangled_name = "_" + maybe_prefixed_mangled_name; -#endif + std::string libName = DLM.searchLibrariesForSymbol('_' + mangled_name, + /*searchSystem=*/ true); +#else + std::string libName = DLM.searchLibrariesForSymbol(mangled_name, + /*searchSystem=*/ true); +#endif //R__MACOSX - std::string LibName = ResolveSymbol(maybe_prefixed_mangled_name, interp); - if (LibName.empty()) + assert(!llvm::StringRef(libName).startswith("libNew") && + "We must not resolve symbols from libNew!"); + + if (libName.empty()) return nullptr; - if (gSystem->Load(LibName.c_str(), "", false) < 0) - Error("TCling__LazyFunctionCreatorAutoloadForModule", - "Failed to load library %s", LibName.c_str()); + if (!LibLoader(libName)) + return nullptr; + + return llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(mangled_name); - void* addr = llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(mangled_name.c_str()); - return addr; } //////////////////////////////////////////////////////////////////////////////// @@ -6504,7 +6225,8 @@ static void* LazyFunctionCreatorAutoloadForModule(const std::string &mangled_nam void* TCling::LazyFunctionCreatorAutoload(const std::string& mangled_name) { if (fCxxModulesEnabled) - return LazyFunctionCreatorAutoloadForModule(mangled_name, GetInterpreterImpl()); + return LazyFunctionCreatorAutoloadForModule(mangled_name, + *GetInterpreterImpl()->getDynamicLibraryManager()); // First see whether the symbol is in the library that we are currently // loading. It will have access to the symbols of its dependent libraries, @@ -7157,7 +6879,7 @@ static std::string GetSharedLibImmediateDepsSlow(std::string lib, if (BinObjFile->isELF()) { // Skip the symbols which are part of the C/C++ runtime and have a // fixed library version. See binutils ld VERSION. Those reside in - // 'system' libraries, which we avoid in ResolveSymbol. + // 'system' libraries, which we avoid in FindLibraryForSymbol. if (SymName.contains("@@GLIBCXX") || SymName.contains("@@CXXABI") || SymName.contains("@@GLIBC") || SymName.contains("@@GCC")) continue; @@ -7177,7 +6899,8 @@ static std::string GetSharedLibImmediateDepsSlow(std::string lib, if (skipLoadedLibs && llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(SymName)) continue; - std::string found = ResolveSymbol(SymName, interp, /*searchSystem*/false); + R__LOCKGUARD(gInterpreterMutex); + std::string found = interp->getDynamicLibraryManager()->searchLibrariesForSymbol(SymName, /*searchSystem*/false); // The expected output is just filename without the full path, which // is not very accurate, because our Dyld implementation might find // a match in location a/b/c.so and if we return just c.so ROOT might diff --git a/core/metacling/src/TClingCallbacks.cxx b/core/metacling/src/TClingCallbacks.cxx index 1a5a4247dca13..74101dc581af8 100644 --- a/core/metacling/src/TClingCallbacks.cxx +++ b/core/metacling/src/TClingCallbacks.cxx @@ -34,6 +34,8 @@ #include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" + +#include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" @@ -74,9 +76,6 @@ extern "C" { void TCling__RestoreInterpreterMutex(void *state); void *TCling__LockCompilationDuringUserCodeExecution(); void TCling__UnlockCompilationDuringUserCodeExecution(void *state); - void TCling__FindLoadedLibraries(std::vector> &sLibraries, - std::vector &sPaths, - cling::Interpreter &interpreter, bool searchSystem); } TClingCallbacks::TClingCallbacks(cling::Interpreter *interp, bool hasCodeGen) : InterpreterCallbacks(interp) @@ -921,103 +920,3 @@ void TClingCallbacks::UnlockCompilationDuringUserCodeExecution(void *StateInfo) { TCling__UnlockCompilationDuringUserCodeExecution(StateInfo); } - -static bool shouldIgnore(const std::string& FileName, - const cling::DynamicLibraryManager& dyLibManager) { - if (llvm::sys::fs::is_directory(FileName)) - return true; - - if (!cling::DynamicLibraryManager::isSharedLibrary(FileName)) - return true; - - // No need to check linked libraries, as this function is only invoked - // for symbols that cannot be found (neither by dlsym nor in the JIT). - if (dyLibManager.isLibraryLoaded(FileName.c_str())) - return true; - - - auto ObjF = llvm::object::ObjectFile::createObjectFile(FileName); - if (!ObjF) { - if (gDebug > 1) - ROOT::TMetaUtils::Warning("[DyLD]", "Failed to read object file %s", - FileName.c_str()); - return true; - } - - llvm::object::ObjectFile *file = ObjF.get().getBinary(); - - if (isa(*file)) { - for (auto S : file->sections()) { - StringRef name; - S.getName(name); - if (name == ".text") { - // Check if the library has only debug symbols, usually when - // stripped with objcopy --only-keep-debug. This check is done by - // reading the manual of objcopy and inspection of stripped with - // objcopy libraries. - auto SecRef = static_cast(S); - if (SecRef.getType() == llvm::ELF::SHT_NOBITS) - return true; - - return (SecRef.getFlags() & llvm::ELF::SHF_ALLOC) == 0; - } - } - return true; - } - //FIXME: Handle osx using isStripped after upgrading to llvm9. - - llvm::StringRef fileStem = llvm::sys::path::stem(FileName); - return fileStem.startswith("libNew") || fileStem.startswith("libcppyy_backend"); -} - -static void SearchAndAddPath(const std::string& Path, - std::vector> &sLibraries, std::vector &sPaths, - std::unordered_set& alreadyLookedPath, cling::DynamicLibraryManager* dyLibManager) -{ - // Already searched? - auto it = alreadyLookedPath.insert(Path); - if (!it.second) - return; - StringRef DirPath(Path); - if (!llvm::sys::fs::is_directory(DirPath)) - return; - - bool flag = false; - std::error_code EC; - for (llvm::sys::fs::directory_iterator DirIt(DirPath, EC), DirEnd; - DirIt != DirEnd && !EC; DirIt.increment(EC)) { - - std::string FileName(DirIt->path()); - if (shouldIgnore(FileName, *dyLibManager)) - continue; - - sLibraries.push_back(std::make_pair(sPaths.size(), llvm::sys::path::filename(FileName))); - flag = true; - } - - if (flag) - sPaths.push_back(Path); -} - -// Extracted here to circumvent ODR clash between -// std::Sp_counted_ptr_inplace, (_gnu_cxx::_Lock_policy)2>::_M_get_deleter(std::type_info const&) -// coming from a no-rtti and a rtti build in libstdc++ from GCC >= 8.1. -// In its function body, rtti uses `arg0 == typeid(...)` protected by #ifdef __cpp_rtti. Depending -// on which symbol (with or without rtti) the linker picks up, the argument `arg0` is a valid -// type_info - or not, in which case this comparison crashes. -// Circumvent this by removing the rtti-use of this function: -void TCling__FindLoadedLibraries(std::vector> &sLibraries, - std::vector &sPaths, - cling::Interpreter &interpreter, bool searchSystem) -{ - // Store the information of path so that we don't have to iterate over the same path again and again. - static std::unordered_set alreadyLookedPath; - cling::DynamicLibraryManager* dyLibManager = interpreter.getDynamicLibraryManager(); - - const auto &searchPaths = dyLibManager->getSearchPath(); - for (const cling::DynamicLibraryManager::SearchPathInfo &Info : searchPaths) { - if (!Info.IsUser && !searchSystem) - continue; - SearchAndAddPath(Info.Path, sLibraries, sPaths, alreadyLookedPath, dyLibManager); - } -} diff --git a/interpreter/cling/include/cling/Interpreter/DynamicLibraryManager.h b/interpreter/cling/include/cling/Interpreter/DynamicLibraryManager.h index 4821fe60f7063..a668a9c19edfc 100644 --- a/interpreter/cling/include/cling/Interpreter/DynamicLibraryManager.h +++ b/interpreter/cling/include/cling/Interpreter/DynamicLibraryManager.h @@ -11,6 +11,7 @@ #define CLING_DYNAMIC_LIBRARY_MANAGER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" @@ -43,7 +44,12 @@ namespace cling { /// True if the Path is on the LD_LIBRARY_PATH. /// bool IsUser; + + bool operator==(const SearchPathInfo& Other) const { + return IsUser == Other.IsUser && Path == Other.Path; + } }; + using SearchPathInfos = llvm::SmallVector; private: typedef const void* DyLibHandle; typedef llvm::DenseMap DyLibs; @@ -58,7 +64,7 @@ namespace cling { ///\brief System's include path, get initialized at construction time. /// - llvm::SmallVector m_SearchPaths; + SearchPathInfos m_SearchPaths; InterpreterCallbacks* m_Callbacks; @@ -80,6 +86,11 @@ namespace cling { ///\returns the canonical path to the file or empty string if not found /// std::string lookupLibMaybeAddExt(llvm::StringRef filename) const; + + /// On a success returns to full path to a shared object that holds the + /// symbol pointed by func. + /// + static std::string getSymbolLocation(void* func); public: DynamicLibraryManager(const InvocationOptions& Opts); ~DynamicLibraryManager(); @@ -91,7 +102,7 @@ namespace cling { /// ///\returns System include paths. /// - const llvm::SmallVectorImpl& getSearchPath() { + const SearchPathInfos& getSearchPaths() const { return m_SearchPaths; } @@ -128,6 +139,26 @@ namespace cling { /// bool isLibraryLoaded(llvm::StringRef fullPath) const; + /// Initialize the dyld. + /// + ///\param [in] shouldPermanentlyIgnore - a callback deciding if a library + /// should be ignored from the result set. Useful for ignoring + /// dangerous libraries such as the ones overriding malloc. + /// + void + initializeDyld(std::function shouldPermanentlyIgnore) + const; + + /// Find the first not-yet-loaded shared object that contains the symbol + /// + ///\param[in] mangledName - the mangled name to look for. + ///\param[in] searchSystem - whether to decend into system libraries. + /// + ///\returns the library name if found, and empty string otherwise. + /// + std::string searchLibrariesForSymbol(const std::string& mangledName, + bool searchSystem = true) const; + ///\brief Explicitly tell the execution engine to use symbols from /// a shared library that would otherwise not be used for symbol /// resolution, e.g. because it was dlopened with RTLD_LOCAL. @@ -145,6 +176,15 @@ namespace cling { /// is a library but of incompatible file format. /// static bool isSharedLibrary(llvm::StringRef libFullPath, bool* exists = 0); + + /// On a success returns to full path to a shared object that holds the + /// symbol pointed by func. + /// + template + static std::string getSymbolLocation(T func) { + static_assert(std::is_pointer::value, "Must be a function pointer!"); + return getSymbolLocation(reinterpret_cast(func)); + } }; } // end namespace cling #endif // CLING_DYNAMIC_LIBRARY_MANAGER_H diff --git a/interpreter/cling/lib/Interpreter/CMakeLists.txt b/interpreter/cling/lib/Interpreter/CMakeLists.txt index 9d21ed460ccc6..c68ca4d3ed671 100644 --- a/interpreter/cling/lib/Interpreter/CMakeLists.txt +++ b/interpreter/cling/lib/Interpreter/CMakeLists.txt @@ -76,6 +76,7 @@ add_cling_library(clingInterpreter OBJECT DeclUnloader.cpp DeviceKernelInliner.cpp DynamicLibraryManager.cpp + DynamicLibraryManagerSymbol.cpp DynamicLookup.cpp DynamicExprInfo.cpp Exception.cpp diff --git a/interpreter/cling/lib/Interpreter/DynamicLibraryManagerSymbol.cpp b/interpreter/cling/lib/Interpreter/DynamicLibraryManagerSymbol.cpp new file mode 100644 index 0000000000000..3192d96c889e2 --- /dev/null +++ b/interpreter/cling/lib/Interpreter/DynamicLibraryManagerSymbol.cpp @@ -0,0 +1,961 @@ +//------------------------------------------------------------------------------ +// CLING - the C++ LLVM-based InterpreterG :) +// author: Vassil Vassilev +// author: Alexander Penev +// +// This file is dual-licensed: you can choose to license it under the University +// of Illinois Open Source License or the GNU Lesser General Public License. See +// LICENSE.TXT for details. +//------------------------------------------------------------------------------ + +#include "cling/Interpreter/DynamicLibraryManager.h" +#include "cling/Utils/Output.h" + +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/StringSet.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/Path.h" + +#include +#include +#include +#include +#include + +#include "llvm/Config/config.h" // Get configuration settings + +#if defined(HAVE_DLFCN_H) && defined(HAVE_DLOPEN) +#include +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif // HAVE_UNISTD_H + +#ifdef __APPLE__ +#include +#endif // __APPLE__ + +#ifdef LLVM_ON_WIN32 +#include // For GetModuleFileNameA +#include // For VirtualQuery +#endif + +// FIXME: Implement debugging output stream in cling. +constexpr unsigned DEBUG = 0; + +namespace { + +using BasePath = std::string; + +// This is a GNU implementation of hash used in bloom filter! +static uint32_t GNUHash(llvm::StringRef S) { + uint32_t H = 5381; + for (uint8_t C : S) + H = (H << 5) + H + C; + return H; +} + +constexpr uint32_t log2u(std::uint32_t n) { + return (n > 1) ? 1 + log2u(n >> 1) : 0; +} + +struct BloomFilter { + + // https://hur.st/bloomfilter + // + // n = ceil(m / (-k / log(1 - exp(log(p) / k)))) + // p = pow(1 - exp(-k / (m / n)), k) + // m = ceil((n * log(p)) / log(1 / pow(2, log(2)))); + // k = round((m / n) * log(2)); + // + // n = symbolsCount + // p = 0.02 + // k = 2 (k1=GNUHash and k2=GNUHash >> bloomShift) + // m = ceil((symbolsCount * log(p)) / log(1 / pow(2, log(2)))); + // bloomShift = min(5 for bits=32 or 6 for bits=64, log2(symbolsCount)) + // bloomSize = ceil((-1.44 * n * log2f(p)) / bits) + + const int m_Bits = 8 * sizeof(uint64_t); + const float m_P = 0.02; + + bool m_IsInitialized = false; + uint32_t m_SymbolsCount = 0; + uint32_t m_BloomSize = 0; + uint32_t m_BloomShift = 0; + std::vector m_BloomTable; + + bool TestHash(uint32_t hash) const { + // This function is superhot. No branches here, breaks inlining and makes + // overall performance around 4x slower. + assert(m_IsInitialized && "Not yet initialized!"); + uint32_t hash2 = hash >> m_BloomShift; + uint32_t n = (hash >> log2u(m_Bits)) % m_BloomSize; + uint64_t mask = ((1ULL << (hash % m_Bits)) | (1ULL << (hash2 % m_Bits))); + return (mask & m_BloomTable[n]) == mask; + } + + void AddHash(uint32_t hash) { + assert(m_IsInitialized && "Not yet initialized!"); + uint32_t hash2 = hash >> m_BloomShift; + uint32_t n = (hash >> log2u(m_Bits)) % m_BloomSize; + uint64_t mask = ((1ULL << (hash % m_Bits)) | (1ULL << (hash2 % m_Bits))); + m_BloomTable[n] |= mask; + } + + void ResizeTable(uint32_t newSymbolsCount) { + assert(m_SymbolsCount == 0 && "Not supported yet!"); + m_SymbolsCount = newSymbolsCount; + m_BloomSize = ceil((-1.44f * m_SymbolsCount * log2f(m_P)) / m_Bits); + m_BloomShift = std::min(6u, log2u(m_SymbolsCount)); + m_BloomTable.resize(m_BloomSize); + } + +}; + + +/// An efficient representation of a full path to a library which does not +/// duplicate common path patterns reducing the overall memory footprint. +/// +/// For example, `/home/.../lib/libA.so`, m_Path will contain a pointer +/// to `/home/.../lib/` +/// will be stored and .second `libA.so`. +/// This approach reduces the duplicate paths as at one location there may be +/// plenty of libraries. +struct LibraryPath { + const BasePath& m_Path; + std::string m_LibName; + BloomFilter m_Filter; + llvm::StringSet<> m_Symbols; + + LibraryPath(const BasePath& Path, const std::string& LibName) + : m_Path(Path), m_LibName(LibName) { } + + bool operator==(const LibraryPath &other) const { + return (&m_Path == &other.m_Path || m_Path == other.m_Path) && + m_LibName == other.m_LibName; + } + + std::string GetFullName() const { + llvm::SmallString<512> Vec(m_Path); + llvm::sys::path::append(Vec, llvm::StringRef(m_LibName)); + return Vec.str().str(); + } + + void AddBloom(llvm::StringRef symbol) { + m_Filter.AddHash(GNUHash(symbol)); + } + + llvm::StringRef AddSymbol(const std::string& symbol) { + auto it = m_Symbols.insert(symbol); + return it.first->getKey(); + } + + bool isBloomFilterEmpty() const { + assert(m_Filter.m_IsInitialized && "Bloom filter not initialized!"); + return m_Filter.m_SymbolsCount == 0; + } + + void InitializeBloomFilter(uint32_t newSymbolsCount) { + assert(isBloomFilterEmpty() && "Cannot re-initialize non-empty filter!"); + assert(!m_Filter.m_IsInitialized && + "Cannot re-initialize non-empty filter!"); + m_Filter.m_IsInitialized = true; + m_Filter.ResizeTable(newSymbolsCount); + } + + bool MayExistSymbol(uint32_t hash) const { + // The library had no symbols and the bloom filter is empty. + if (isBloomFilterEmpty()) + return false; + + return m_Filter.TestHash(hash); + } + + bool ExistSymbol(llvm::StringRef symbol) const { + return m_Symbols.find(symbol) != m_Symbols.end(); + } +}; + + +/// A helper class keeping track of loaded libraries. It implements a fast +/// search O(1) while keeping deterministic iterability in a memory efficient +/// way. The underlying set uses a custom hasher for better efficiency given the +/// specific problem where the library names (m_LibName) are relatively short +/// strings and the base paths (m_Path) are repetitive long strings. +class LibraryPaths { + struct LibraryPathHashFn { + size_t operator()(const LibraryPath& item) const { + return std::hash()(item.m_Path.length()) ^ + std::hash()(item.m_LibName); + } + }; + + std::vector m_Libs; + std::unordered_set m_LibsH; +public: + bool HasRegisteredLib(const LibraryPath& Lib) const { + return m_LibsH.count(Lib); + } + + void RegisterLib(const LibraryPath& Lib) { + auto it = m_LibsH.insert(Lib); + assert(it.second && "Already registered!"); + m_Libs.push_back(&*it.first); + } + + void UnregisterLib(const LibraryPath& Lib) { + auto found = m_LibsH.find(Lib); + if (found == m_LibsH.end()) + return; + + m_Libs.erase(std::find(m_Libs.begin(), m_Libs.end(), &*found)); + m_LibsH.erase(found); + } + + size_t size() const { + assert(m_Libs.size() == m_LibsH.size()); + return m_Libs.size(); + } + + const std::vector& GetLibraries() const { + return m_Libs; + } +}; + +class Dyld { + + struct BasePathHashFunction { + size_t operator()(const BasePath& item) const { + return std::hash()(item); + } + }; + + struct BasePathEqFunction { + size_t operator()(const BasePath& l, const BasePath& r) const { + return &l == &r || l == r; + } + }; + /// A memory efficient llvm::VectorSet. The class provides O(1) search + /// complexity. It is tuned to compare BasePaths first by checking the + /// address and then the representation which models the base path reuse. + class BasePaths { + std::unordered_set m_Paths; + + public: + const BasePath& RegisterBasePath(const std::string& Path, + bool* WasInserted = nullptr) { + auto it = m_Paths.insert(Path); + if (WasInserted) + *WasInserted = it.second; + + return *it.first; + } + + bool Contains (const std::string& Path) { + return m_Paths.count(Path); + } + }; + + bool m_FirstRun = true; + bool m_FirstRunSysLib = true; + bool m_UseBloomFilter = true; + bool m_UseHashTable = true; + + const cling::DynamicLibraryManager& m_DynamicLibraryManager; + + /// The basename of `/home/.../lib/libA.so`, + /// m_BasePaths will contain `/home/.../lib/` + BasePaths m_BasePaths; + + LibraryPaths m_Libraries; + LibraryPaths m_SysLibraries; + /// Contains a set of libraries which we gave to the user via ResolveSymbol + /// call and next time we should check if the user loaded them to avoid + /// useless iterations. + std::vector m_QueriedLibraries; + + /// Scan for shared objects which are not yet loaded. They are a our symbol + /// resolution candidate sources. + /// NOTE: We only scan not loaded shared objects. + /// \param[in] searchSystemLibraries - whether to decent to standard system + /// locations for shared objects. + void ScanForLibraries(bool searchSystemLibraries = false); + + /// Builds a bloom filter lookup optimization. + void BuildBloomFilter(LibraryPath* Lib, llvm::object::ObjectFile *BinObjFile, + unsigned IgnoreSymbolFlags = 0) const; + + + /// Looks up symbols from a an object file, representing the library. + ///\param[in] Lib - full path to the library. + ///\param[in] mangledName - the mangled name to look for. + ///\param[in] IgnoreSymbolFlags - The symbols to ignore upon a match. + ///\returns true on success. + bool ContainsSymbol(const LibraryPath* Lib, const std::string &mangledName, + unsigned IgnoreSymbolFlags = 0) const; + +protected: + Dyld(const cling::DynamicLibraryManager &DLM) + : m_DynamicLibraryManager(DLM) { } + + ~Dyld() = default; + +public: + static Dyld& getInstance(const cling::DynamicLibraryManager &DLM) { + static Dyld instance(DLM); + +#ifndef NDEBUG + auto &NewSearchPaths = DLM.getSearchPaths(); + auto &OldSearchPaths = instance.m_DynamicLibraryManager.getSearchPaths(); + // FIXME: Move the Dyld logic to the cling::DynamicLibraryManager itself! + assert(std::equal(OldSearchPaths.begin(), OldSearchPaths.end(), + NewSearchPaths.begin()) && "Path was added/removed!"); +#endif + + return instance; + } + + // delete copy and move constructors and assign operators + Dyld(Dyld const&) = delete; + Dyld(Dyld&&) = delete; + Dyld& operator=(Dyld const&) = delete; + Dyld& operator=(Dyld &&) = delete; + + std::string searchLibrariesForSymbol(const std::string& mangledName, + bool searchSystem); +}; + + +static bool s_IsDyldInitialized = false; +static std::function s_ShouldPermanentlyIgnoreCallback; + + +static std::string getRealPath(llvm::StringRef path) { + llvm::SmallString<512> realPath; + llvm::sys::fs::real_path(path, realPath, /*expandTilde*/true); + return realPath.str().str(); +} + +static llvm::StringRef s_ExecutableFormat; + +static bool shouldPermanentlyIgnore(const std::string& FileName, + const cling::DynamicLibraryManager& dyLibManager) { + assert(FileName == getRealPath(FileName)); + assert(!s_ExecutableFormat.empty() && "Failed to find the object format!"); + + if (llvm::sys::fs::is_directory(FileName)) + return true; + + if (!cling::DynamicLibraryManager::isSharedLibrary(FileName)) + return true; + + // No need to check linked libraries, as this function is only invoked + // for symbols that cannot be found (neither by dlsym nor in the JIT). + if (dyLibManager.isLibraryLoaded(FileName.c_str())) + return true; + + + auto ObjF = llvm::object::ObjectFile::createObjectFile(FileName); + if (!ObjF) { + if (DEBUG > 1) + cling::errs() << "[DyLD] Failed to read object file " + << FileName << "\n"; + return true; + } + + llvm::object::ObjectFile *file = ObjF.get().getBinary(); + + if (DEBUG > 1) + cling::errs() << "Current executable format: " << s_ExecutableFormat + << ". Executable format of " << FileName << " : " + << file->getFileFormatName() << "\n"; + + // Ignore libraries with different format than the executing one. + if (s_ExecutableFormat != file->getFileFormatName()) + return true; + + if (llvm::isa(*file)) { + for (auto S : file->sections()) { + llvm::StringRef name; + S.getName(name); + if (name == ".text") { + // Check if the library has only debug symbols, usually when + // stripped with objcopy --only-keep-debug. This check is done by + // reading the manual of objcopy and inspection of stripped with + // objcopy libraries. + auto SecRef = static_cast(S); + if (SecRef.getType() == llvm::ELF::SHT_NOBITS) + return true; + + return (SecRef.getFlags() & llvm::ELF::SHF_ALLOC) == 0; + } + } + return true; + } + + //FIXME: Handle osx using isStripped after upgrading to llvm9. + + return s_ShouldPermanentlyIgnoreCallback(FileName); +} + +void Dyld::ScanForLibraries(bool searchSystemLibraries/* = false*/) { + + // #ifndef NDEBUG + // if (!m_FirstRun && !m_FirstRunSysLib) + // assert(0 && "Already initialized"); + // if (m_FirstRun && !m_Libraries->size()) + // assert(0 && "Not initialized but m_Libraries is non-empty!"); + // // assert((m_FirstRun || m_FirstRunSysLib) && (m_Libraries->size() || + // m_SysLibraries.size()) + // // && "Already scanned and initialized!"); + // #endif + + const auto &searchPaths = m_DynamicLibraryManager.getSearchPaths(); + for (const cling::DynamicLibraryManager::SearchPathInfo &Info : searchPaths) { + if (Info.IsUser || searchSystemLibraries) { + // Examples which we should handle. + // File Real + // /lib/1/1.so /lib/1/1.so // file + // /lib/1/2.so->/lib/1/1.so /lib/1/1.so // file local link + // /lib/1/3.so->/lib/3/1.so /lib/3/1.so // file external link + // /lib/2->/lib/1 // path link + // /lib/2/1.so /lib/1/1.so // path link, file + // /lib/2/2.so->/lib/1/1.so /lib/1/1.so // path link, file local link + // /lib/2/3.so->/lib/3/1.so /lib/3/1.so // path link, file external link + // + // /lib/3/1.so + // /lib/3/2.so->/system/lib/s.so + // /lib/3/3.so + // /system/lib/1.so + // + // Paths = /lib/1 : /lib/2 : /lib/3 + + // m_BasePaths = ["/lib/1", "/lib/3", "/system/lib"] + // m_*Libraries = [<0,"1.so">, <1,"1.so">, <2,"s.so">, <1,"3.so">] + std::string RealPath = getRealPath(Info.Path); + llvm::StringRef DirPath(RealPath); + + if (!llvm::sys::fs::is_directory(DirPath) || DirPath.empty()) + continue; + + // Already searched? + bool WasInserted; + m_BasePaths.RegisterBasePath(RealPath, &WasInserted); + + if (!WasInserted) + continue; + + std::error_code EC; + for (llvm::sys::fs::directory_iterator DirIt(DirPath, EC), DirEnd; + DirIt != DirEnd && !EC; DirIt.increment(EC)) { + + // FIXME: Use a StringRef here! + std::string FileName = getRealPath(DirIt->path()); + assert(!llvm::sys::fs::is_symlink_file(FileName)); + + if (shouldPermanentlyIgnore(FileName, m_DynamicLibraryManager)) + continue; + + std::string FileRealPath = llvm::sys::path::parent_path(FileName); + FileName = llvm::sys::path::filename(FileName); + const BasePath& BaseP = m_BasePaths.RegisterBasePath(FileRealPath); + LibraryPath LibPath(BaseP, FileName); + if (m_SysLibraries.HasRegisteredLib(LibPath) || + m_Libraries.HasRegisteredLib(LibPath)) + continue; + + if (searchSystemLibraries) + m_SysLibraries.RegisterLib(LibPath); + else + m_Libraries.RegisterLib(LibPath); + } + } + } +} + +void Dyld::BuildBloomFilter(LibraryPath* Lib, + llvm::object::ObjectFile *BinObjFile, + unsigned IgnoreSymbolFlags /*= 0*/) const { + assert(m_UseBloomFilter && "Bloom filter is disabled"); + assert(Lib->isBloomFilterEmpty() && "Already built!"); + + using namespace llvm; + using namespace llvm::object; + + // If BloomFilter is empty then build it. + // Count Symbols and generate BloomFilter + uint32_t SymbolsCount = 0; + std::list symbols; + for (const llvm::object::SymbolRef &S : BinObjFile->symbols()) { + uint32_t Flags = S.getFlags(); + // Do not insert in the table symbols flagged to ignore. + if (Flags & IgnoreSymbolFlags) + continue; + + // Note, we are at last resort and loading library based on a weak + // symbol is allowed. Otherwise, the JIT will issue an unresolved + // symbol error. + // + // There are other weak symbol kinds (marked as 'V') to denote + // typeinfo and vtables. It is unclear whether we should load such + // libraries or from which library we should resolve the symbol. + // We seem to not have a way to differentiate it from the symbol API. + + llvm::Expected SymNameErr = S.getName(); + if (!SymNameErr) { + cling::errs()<< "Dyld::BuildBloomFilter: Failed to read symbol " + << SymNameErr.get() << "\n"; + continue; + } + + if (SymNameErr.get().empty()) + continue; + + ++SymbolsCount; + symbols.push_back(SymNameErr.get()); + } + + if (BinObjFile->isELF()) { + // ELF file format has .dynstr section for the dynamic symbol table. + const auto *ElfObj = cast(BinObjFile); + + for (const object::SymbolRef &S : ElfObj->getDynamicSymbolIterators()) { + uint32_t Flags = S.getFlags(); + // DO NOT insert to table if symbol was undefined + if (Flags & llvm::object::SymbolRef::SF_Undefined) + continue; + + // Note, we are at last resort and loading library based on a weak + // symbol is allowed. Otherwise, the JIT will issue an unresolved + // symbol error. + // + // There are other weak symbol kinds (marked as 'V') to denote + // typeinfo and vtables. It is unclear whether we should load such + // libraries or from which library we should resolve the symbol. + // We seem to not have a way to differentiate it from the symbol API. + + llvm::Expected SymNameErr = S.getName(); + if (!SymNameErr) { + cling::errs() << "Dyld::BuildBloomFilter: Failed to read symbol " + < 7) + cling::errs() << "Dyld::BuildBloomFilter: No symbols!\n"; + return; + } + + if (DEBUG > 7) { + cling::errs() << "Dyld::BuildBloomFilter: Symbols:\n"; + for (auto it : symbols) + cling::errs() << "Dyld::BuildBloomFilter" << "- " << it << "\n"; + } + + Lib->InitializeBloomFilter(SymbolsCount); + // Generate BloomFilter + for (const auto &S : symbols) { + if (m_UseHashTable) + Lib->AddBloom(Lib->AddSymbol(S)); + else + Lib->AddBloom(S); + } +} + + +static llvm::StringRef GetGnuHashSection(llvm::object::ObjectFile *file) { + for (auto S : file->sections()) { + llvm::StringRef name; + S.getName(name); + if (name == ".gnu.hash") { + llvm::StringRef content; + S.getContents(content); + return content; + } + } + return ""; +} + +/// Bloom filter in a stohastic data structure which can tell us if a symbol +/// name does not exist in a library with 100% certainty. If it tells us it +/// exists this may not be true: +/// https://blogs.oracle.com/solaris/gnu-hash-elf-sections-v2 +/// +/// ELF has this optimization in the new linkers by default, It is stored in the +/// gnu.hash section of the object file. +/// +///\returns true if the symbol may be in the library. +static bool MayExistInElfObjectFile(llvm::object::ObjectFile *soFile, + uint32_t hash) { + assert(soFile->isELF() && "Not ELF"); + + // LLVM9: soFile->makeTriple().is64Bit() + const int bits = 8 * soFile->getBytesInAddress(); + + llvm::StringRef contents = GetGnuHashSection(soFile); + if (contents.size() < 16) + // We need to search if the library doesn't have .gnu.hash section! + return true; + const char* hashContent = contents.data(); + + // See https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/ for .gnu.hash + // table layout. + uint32_t maskWords = *reinterpret_cast(hashContent + 8); + uint32_t shift2 = *reinterpret_cast(hashContent + 12); + uint32_t hash2 = hash >> shift2; + uint32_t n = (hash / bits) % maskWords; + + const char *bloomfilter = hashContent + 16; + const char *hash_pos = bloomfilter + n*(bits/8); // * (Bits / 8) + uint64_t word = *reinterpret_cast(hash_pos); + uint64_t bitmask = ( (1ULL << (hash % bits)) | (1ULL << (hash2 % bits))); + return (bitmask & word) == bitmask; +} + +bool Dyld::ContainsSymbol(const LibraryPath* Lib, + const std::string &mangledName, + unsigned IgnoreSymbolFlags /*= 0*/) const { + const std::string library_filename = Lib->GetFullName(); + + if (DEBUG > 7) { + cling::errs() << "Dyld::ContainsSymbol: Find symbol: lib=" + << library_filename << ", mangled=" + << mangledName << "\n"; + } + + auto ObjF = llvm::object::ObjectFile::createObjectFile(library_filename); + if (llvm::Error Err = ObjF.takeError()) { + if (DEBUG > 1) { + std::string Message; + handleAllErrors(std::move(Err), [&](llvm::ErrorInfoBase &EIB) { + Message += EIB.message() + "; "; + }); + cling::errs() << "Dyld::ContainsSymbol: Failed to read object file " + << library_filename << " Errors: " << Message << "\n"; + } + return false; + } + + llvm::object::ObjectFile *BinObjFile = ObjF.get().getBinary(); + + uint32_t hashedMangle = GNUHash(mangledName); + // Check for the gnu.hash section if ELF. + // If the symbol doesn't exist, exit early. + if (BinObjFile->isELF() && !MayExistInElfObjectFile(BinObjFile, hashedMangle)) + return false; + + if (m_UseBloomFilter) { + // Use our bloom filters and create them if necessary. + if (Lib->isBloomFilterEmpty()) + BuildBloomFilter(const_cast(Lib), BinObjFile, + IgnoreSymbolFlags); + + // If the symbol does not exist, exit early. In case it may exist, iterate. + if (!Lib->MayExistSymbol(hashedMangle)) { + if (DEBUG > 7) + cling::errs() << "Dyld::ContainsSymbol: BloomFilter: Skip symbol.\n"; + return false; + } + if (DEBUG > 7) + cling::errs() << "Dyld::ContainsSymbol: BloomFilter: Symbol May exist." + << " Search for it."; + } + + if (m_UseHashTable) { + bool result = Lib->ExistSymbol(mangledName); + if (DEBUG > 7) + cling::errs() << "Dyld::ContainsSymbol: HashTable: Symbol " + << (result ? "Exist" : "Not exist") << "\n"; + return result; + } + + // Symbol may exist. Iterate. + + // If no hash symbol then iterate to detect symbol + // We Iterate only if BloomFilter and/or SymbolHashTable are not supported. + for (const llvm::object::SymbolRef &S : BinObjFile->symbols()) { + uint32_t Flags = S.getFlags(); + // Do not insert in the table symbols flagged to ignore. + if (Flags & IgnoreSymbolFlags) + continue; + + // Note, we are at last resort and loading library based on a weak + // symbol is allowed. Otherwise, the JIT will issue an unresolved + // symbol error. + // + // There are other weak symbol kinds (marked as 'V') to denote + // typeinfo and vtables. It is unclear whether we should load such + // libraries or from which library we should resolve the symbol. + // We seem to not have a way to differentiate it from the symbol API. + + llvm::Expected SymNameErr = S.getName(); + if (!SymNameErr) { + cling::errs() << "Dyld::ContainsSymbol: Failed to read symbol " + << mangledName << "\n"; + continue; + } + + if (SymNameErr.get().empty()) + continue; + + if (SymNameErr.get() == mangledName) { + if (DEBUG > 1) { + cling::errs() << "Dyld::ContainsSymbol: Symbol " + << mangledName << " found in " + << library_filename << "\n"; + return true; + } + } + } + + if (!BinObjFile->isELF()) + return false; + + // ELF file format has .dynstr section for the dynamic symbol table. + const auto *ElfObj = llvm::cast(BinObjFile); + + for (const llvm::object::SymbolRef &S : ElfObj->getDynamicSymbolIterators()) { + uint32_t Flags = S.getFlags(); + // DO NOT insert to table if symbol was undefined + if (Flags & llvm::object::SymbolRef::SF_Undefined) + continue; + + // Note, we are at last resort and loading library based on a weak + // symbol is allowed. Otherwise, the JIT will issue an unresolved + // symbol error. + // + // There are other weak symbol kinds (marked as 'V') to denote + // typeinfo and vtables. It is unclear whether we should load such + // libraries or from which library we should resolve the symbol. + // We seem to not have a way to differentiate it from the symbol API. + + llvm::Expected SymNameErr = S.getName(); + if (!SymNameErr) { + cling::errs() << "Dyld::ContainsSymbol: Failed to read symbol " + << mangledName << "\n"; + continue; + } + + if (SymNameErr.get().empty()) + continue; + + if (SymNameErr.get() == mangledName) + return true; + } + return false; +} + +std::string Dyld::searchLibrariesForSymbol(const std::string& mangledName, + bool searchSystem/* = true*/) { + assert(!llvm::sys::DynamicLibrary::SearchForAddressOfSymbol(mangledName) && + "Library already loaded, please use dlsym!"); + assert(!mangledName.empty()); + using namespace llvm::sys::path; + using namespace llvm::sys::fs; + + if (m_FirstRun) { + ScanForLibraries(/* SearchSystemLibraries= */ false); + m_FirstRun = false; + } + + if (!m_QueriedLibraries.empty()) { + // Last call we were asked if a library contains a symbol. Usually, the + // caller wants to load this library. Check if was loaded and remove it + // from our lists of not-yet-loaded libs. + + if (DEBUG > 7) { + cling::errs() << "Dyld::ResolveSymbol: m_QueriedLibraries:\n"; + size_t x = 0; + for (auto item : m_QueriedLibraries) { + cling::errs() << "Dyld::ResolveSymbol - [" << x++ << "]:" + << &item << ": " << item.m_Path << ", " + << item.m_LibName << "\n"; + } + } + + for (const LibraryPath& P : m_QueriedLibraries) { + const std::string LibName = P.GetFullName(); + if (!m_DynamicLibraryManager.isLibraryLoaded(LibName)) + continue; + + m_Libraries.UnregisterLib(P); + m_SysLibraries.UnregisterLib(P); + } + } + + // Iterate over files under this path. We want to get each ".so" files + for (const LibraryPath* P : m_Libraries.GetLibraries()) { + const std::string LibName = P->GetFullName(); + + if (ContainsSymbol(P, mangledName, /*ignore*/ + llvm::object::SymbolRef::SF_Undefined)) { + m_QueriedLibraries.push_back(*P); + return LibName; + } + } + + if (!searchSystem) + return ""; + + if (DEBUG > 7) + cling::errs() << "Dyld::ResolveSymbol: SearchSystem!\n"; + + // Lookup in non-system libraries failed. Expand the search to the system. + if (m_FirstRunSysLib) { + ScanForLibraries(/* SearchSystemLibraries= */ true); + m_FirstRunSysLib = false; + } + + for (const LibraryPath* P : m_SysLibraries.GetLibraries()) { + const std::string LibName = P->GetFullName(); + if (ContainsSymbol(P, mangledName, /*ignore*/ + llvm::object::SymbolRef::SF_Undefined | + llvm::object::SymbolRef::SF_Weak)) { + m_QueriedLibraries.push_back(*P); + return LibName; + } + } + + if (DEBUG > 7) + cling::errs() << "Dyld::ResolveSymbol: Search found no match!\n"; + + /* + if (DEBUG > 7) { + cling::errs() << "Dyld::ResolveSymbol: Structs after ResolveSymbol:\n"); + + cling::errs() << "Dyld::ResolveSymbol - sPaths:\n"); + size_t x = 0; + for (const auto &item : sPaths.GetPaths()) + cling::errs() << "Dyld::ResolveSymbol << [" x++ << "]: " << item << "\n"; + + cling::errs() << "Dyld::ResolveSymbol - sLibs:\n"); + x = 0; + for (const auto &item : sLibraries.GetLibraries()) + cling::errs() << "Dyld::ResolveSymbol [" + << x++ << "]: " << item->Path << ", " + << item->LibName << "\n"; + + cling::errs() << "Dyld::ResolveSymbol - sSysLibs:"); + x = 0; + for (const auto &item : sSysLibraries.GetLibraries()) + cling::errs() << "Dyld::ResolveSymbol [" + << x++ << "]: " << item->Path << ", " + << item->LibName << "\n"; + + Info("Dyld::ResolveSymbol", "- sQueriedLibs:"); + x = 0; + for (const auto &item : sQueriedLibraries) + cling::errs() << "Dyld::ResolveSymbol [" + << x++ << "]: " << item->Path << ", " + << item->LibName << "\n"; + } + */ + + return ""; // Search found no match. +} +} // anon namespace + +// This function isn't referenced outside its translation unit, but it +// can't use the "static" keyword because its address is used for +// GetMainExecutable (since some platforms don't support taking the +// address of main, and some platforms can't implement GetMainExecutable +// without being given the address of a function in the main executable). +std::string GetExecutablePath() { + // This just needs to be some symbol in the binary; C++ doesn't + // allow taking the address of ::main however. + return cling::DynamicLibraryManager::getSymbolLocation(&GetExecutablePath); +} + +namespace cling { + void DynamicLibraryManager::initializeDyld( + std::function shouldPermanentlyIgnore) const { + assert(!s_IsDyldInitialized); + s_ShouldPermanentlyIgnoreCallback = shouldPermanentlyIgnore; + + std::string exeP = GetExecutablePath(); + auto ObjF = + cantFail(llvm::object::ObjectFile::createObjectFile(exeP)); + s_ExecutableFormat = ObjF.getBinary()->getFileFormatName(); + + s_IsDyldInitialized = true; + } + + std::string + DynamicLibraryManager::searchLibrariesForSymbol(const std::string& mangledName, + bool searchSystem/* = true*/) const { + assert(s_IsDyldInitialized && "Must call initialize dyld before!"); + static Dyld& dyld = Dyld::getInstance(*this); + return dyld.searchLibrariesForSymbol(mangledName, searchSystem); + } + + std::string DynamicLibraryManager::getSymbolLocation(void *func) { +#if defined(__CYGWIN__) && defined(__GNUC__) + return {}; +#elif defined(LLVM_ON_WIN32) + MEMORY_BASIC_INFORMATION mbi; + if (!VirtualQuery (func, &mbi, sizeof (mbi))) + return {}; + + HMODULE hMod = (HMODULE) mbi.AllocationBase; + char moduleName[MAX_PATH]; + + if (!GetModuleFileNameA (hMod, moduleName, sizeof (moduleName))) + return {}; + + return getRealPath(moduleName); +#else + // assume we have defined HAVE_DLFCN_H and HAVE_DLADDR + Dl_info info; + if (dladdr((void*)func, &info) == 0) { + // Not in a known shared library, let's give up + return {}; + } else { + if (strchr(info.dli_fname, '/')) + return getRealPath(info.dli_fname); + // Else absolute path. For all we know that's a binary. + // Some people have dictionaries in binaries, this is how we find their + // path: (see also https://stackoverflow.com/a/1024937/6182509) +# if defined(__APPLE__) + char buf[PATH_MAX] = { 0 }; + uint32_t bufsize = sizeof(buf); + if (_NSGetExecutablePath(buf, &bufsize) >= 0) + return getRealPath(buf); + return getRealPath(info.dli_fname); +# elif defined(LLVM_ON_UNIX) + char buf[PATH_MAX] = { 0 }; + // Cross our fingers that /proc/self/exe exists. + if (readlink("/proc/self/exe", buf, sizeof(buf)) > 0) + return getRealPath(buf); + std::string pipeCmd = std::string("which \"") + info.dli_fname + "\""; + FILE* pipe = popen(pipeCmd.c_str(), "r"); + if (!pipe) + return getRealPath(info.dli_fname); + std::string result; + while (fgets(buf, sizeof(buf), pipe)) + result += buf; + + pclose(pipe); + return getRealPath(result); +# else +# error "Unsupported platform." +# endif + return {}; + } +#endif + } + +} // namespace cling